Import Libraries¶
In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
import json
In [2]:
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
In [3]:
# Text processing
import re
import string
from collections import Counter
In [4]:
# NLP libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
Download NLTK Data¶
In [5]:
# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')
[nltk_data] Downloading package punkt to /root/nltk_data... [nltk_data] Unzipping tokenizers/punkt.zip. [nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Unzipping corpora/stopwords.zip. [nltk_data] Downloading package wordnet to /root/nltk_data... [nltk_data] Downloading package omw-1.4 to /root/nltk_data... [nltk_data] Downloading package punkt_tab to /root/nltk_data... [nltk_data] Unzipping tokenizers/punkt_tab.zip.
Out[5]:
True
In [6]:
# Machine Learning - Traditional Models
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import VotingClassifier
In [7]:
# Metrics and Evaluation
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
classification_report, confusion_matrix, roc_curve, auc,
roc_auc_score, precision_recall_curve, average_precision_score
)
from sklearn.preprocessing import label_binarize
In [8]:
# Set style for better visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
LOAD DATASETS¶
In [9]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [10]:
train_path = '/content/drive/MyDrive/Emotion_Text/Train_dataset.jsonl'
test_path = '/content/drive/MyDrive/Emotion_Text/Test_dataset.jsonl'
val_path = '/content/drive/MyDrive/Emotion_Text/Validation_dataset.jsonl'
In [11]:
def load_jsonl_to_df(path):
data = []
with open(path, 'r') as f:
for line in f:
line = line.strip()
if line:
try:
data.append(json.loads(line))
except json.JSONDecodeError as e:
print(f"Skipping malformed JSON line in {path}: {line} - Error: {e}")
return pd.DataFrame(data)
print("Loading train_df...")
train_df = load_jsonl_to_df(train_path)
print("Loading test_df...")
test_df = load_jsonl_to_df(test_path)
print("Loading val_df...")
val_df = load_jsonl_to_df(val_path)
print("DataFrames loaded successfully.")
Loading train_df... Loading test_df... Loading val_df... DataFrames loaded successfully.
In [12]:
import os
# List the contents of the Emotion_Text directory in your Google Drive
# This will help you verify the correct file names and paths.
drive_path = '/content/drive/MyDrive/Emotion_Text/'
if os.path.exists(drive_path):
print(f"Contents of {drive_path}:")
for item in os.listdir(drive_path):
print(f"- {item}")
else:
print(f"The directory '{drive_path}' does not exist. Please ensure your Google Drive is mounted and the path is correct.")
Contents of /content/drive/MyDrive/Emotion_Text/: - Test_dataset.jsonl - Train_dataset.jsonl - Validation_dataset.jsonl
In [13]:
train_df
Out[13]:
| text | label | |
|---|---|---|
| 0 | i am just so sick of feeling like this and i j... | 3 |
| 1 | i do know that i am not ready to feel any roma... | 2 |
| 2 | i would being feeling strange or nauseous so i... | 4 |
| 3 | i feel agitated of the time running so real slow | 4 |
| 4 | ive worked particularly hard with this year on... | 4 |
| ... | ... | ... |
| 15995 | i need to make time for me and for doing thing... | 2 |
| 15996 | i feel this especially when i think back to si... | 2 |
| 15997 | i have discovered that every time i feel like ... | 3 |
| 15998 | i am starting to feel sorry for the muppet who... | 0 |
| 15999 | i do get lucky i just feel nervous about how b... | 4 |
16000 rows × 2 columns
In [14]:
test_df
Out[14]:
| text | label | |
|---|---|---|
| 0 | i feel enraged by the amount of people partici... | 3 |
| 1 | i am so ready to learn more and feel invigorat... | 1 |
| 2 | i but i have to say i am loving my house and w... | 1 |
| 3 | i feel her kicking i stop and just focus on he... | 5 |
| 4 | i feel proud of how he quickly picks up the co... | 1 |
| ... | ... | ... |
| 1995 | i have more energy then the first weeks and fe... | 5 |
| 1996 | i feel slightly heartbroken | 0 |
| 1997 | i feel i can safely assume that either hes bee... | 0 |
| 1998 | i feel like an emotional pretzel at times twis... | 0 |
| 1999 | i feel festive leave a comment | 1 |
2000 rows × 2 columns
In [15]:
val_df
Out[15]:
| text | label | |
|---|---|---|
| 0 | i am alone in feeling uncertain | 4 |
| 1 | i want them to have time to get different loca... | 1 |
| 2 | i am also feeling a little unsure of how i fee... | 4 |
| 3 | i remember feeling as innocent as she looked t... | 1 |
| 4 | i am quite done with being made to feel like a... | 0 |
| ... | ... | ... |
| 1995 | im just gonna skip the gory details because by... | 2 |
| 1996 | i is distraught her eyes go so wide and glazed... | 5 |
| 1997 | i have spent a lot of my life feeling unhappy ... | 0 |
| 1998 | i remember watching and feeling a little surpr... | 5 |
| 1999 | im feeling very petty right now | 3 |
2000 rows × 2 columns
In [16]:
print(f"\nTotal Samples: {len(train_df) + len(val_df) + len(test_df)}")
Total Samples: 20000
In [17]:
# Emotion labels mapping
emotion_labels = {
0: 'Sadness',
1: 'Joy',
2: 'Love',
3: 'Anger',
4: 'Fear',
5: 'Surprise'
}
In [18]:
# Add emotion names to dataframes
train_df['emotion_name'] = train_df['label'].map(emotion_labels)
val_df['emotion_name'] = val_df['label'].map(emotion_labels)
test_df['emotion_name'] = test_df['label'].map(emotion_labels)
In [19]:
print("\n DATASET INFORMATION:")
train_df.info()
DATASET INFORMATION: <class 'pandas.core.frame.DataFrame'> RangeIndex: 16000 entries, 0 to 15999 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 text 16000 non-null object 1 label 16000 non-null int64 2 emotion_name 16000 non-null object dtypes: int64(1), object(2) memory usage: 375.1+ KB
In [20]:
train_df
Out[20]:
| text | label | emotion_name | |
|---|---|---|---|
| 0 | i am just so sick of feeling like this and i j... | 3 | Anger |
| 1 | i do know that i am not ready to feel any roma... | 2 | Love |
| 2 | i would being feeling strange or nauseous so i... | 4 | Fear |
| 3 | i feel agitated of the time running so real slow | 4 | Fear |
| 4 | ive worked particularly hard with this year on... | 4 | Fear |
| ... | ... | ... | ... |
| 15995 | i need to make time for me and for doing thing... | 2 | Love |
| 15996 | i feel this especially when i think back to si... | 2 | Love |
| 15997 | i have discovered that every time i feel like ... | 3 | Anger |
| 15998 | i am starting to feel sorry for the muppet who... | 0 | Sadness |
| 15999 | i do get lucky i just feel nervous about how b... | 4 | Fear |
16000 rows × 3 columns
BASIC DATA EXPLORATION¶
In [21]:
print("BASIC STATISTICS:")
print(train_df.describe())
BASIC STATISTICS:
label
count 16000.000000
mean 2.499938
std 1.707830
min 0.000000
25% 1.000000
50% 2.500000
75% 4.000000
max 5.000000
In [22]:
print("\n MISSING VALUES CHECK:")
print("-" * 40)
print(f"Training Set Missing Values:\n{train_df.isnull().sum()}")
print(f"\nValidation Set Missing Values:\n{val_df.isnull().sum()}")
print(f"\nTest Set Missing Values:\n{test_df.isnull().sum()}")
MISSING VALUES CHECK: ---------------------------------------- Training Set Missing Values: text 0 label 0 emotion_name 0 dtype: int64 Validation Set Missing Values: text 0 label 0 emotion_name 0 dtype: int64 Test Set Missing Values: text 0 label 0 emotion_name 0 dtype: int64
In [23]:
# Check for duplicates
print("DUPLICATE CHECK:")
print(f"Training Set Duplicates: {train_df.duplicated().sum()}")
print(f"Validation Set Duplicates: {val_df.duplicated().sum()}")
print(f"Test Set Duplicates: {test_df.duplicated().sum()}")
DUPLICATE CHECK: Training Set Duplicates: 0 Validation Set Duplicates: 0 Test Set Duplicates: 0
In [24]:
# Calculate distributions
train_dist = train_df['emotion_name'].value_counts().sort_index()
val_dist = val_df['emotion_name'].value_counts().sort_index()
test_dist = test_df['emotion_name'].value_counts().sort_index()
In [25]:
print("CLASS DISTRIBUTION:")
print("Training Set:")
print(train_dist)
print(f"\nValidation Set:")
print(val_dist)
print(f"\nTest Set:")
print(test_dist)
CLASS DISTRIBUTION: Training Set: emotion_name Anger 2667 Fear 2667 Joy 2666 Love 2667 Sadness 2667 Surprise 2666 Name: count, dtype: int64 Validation Set: emotion_name Anger 334 Fear 333 Joy 333 Love 333 Sadness 333 Surprise 334 Name: count, dtype: int64 Test Set: emotion_name Anger 333 Fear 334 Joy 334 Love 333 Sadness 333 Surprise 333 Name: count, dtype: int64
Visualization 1: Class Distribution - Bar Chart¶
In [26]:
# Visualization 1: Class Distribution - Bar Chart
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
for idx, (df, title) in enumerate([(train_df, 'Training Set'),
(val_df, 'Validation Set'),
(test_df, 'Test Set')]):
emotion_counts = df['emotion_name'].value_counts()
axes[idx].bar(emotion_counts.index, emotion_counts.values,
color=sns.color_palette("husl", 6))
axes[idx].set_title(f'{title} - Class Distribution', fontsize=14, fontweight='bold')
axes[idx].set_xlabel('Emotion', fontsize=12)
axes[idx].set_ylabel('Count', fontsize=12)
axes[idx].tick_params(axis='x', rotation=45)
# Add value labels on bars
for i, v in enumerate(emotion_counts.values):
axes[idx].text(i, v + 50, str(v), ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
plt.savefig('class_distribution_bar.png', dpi=300, bbox_inches='tight')
plt.show()
Visualization 2: Pie Chart - Training Set Distribution¶
In [27]:
# Visualization 2: Pie Chart - Training Set Distribution
fig, ax = plt.subplots(figsize=(10, 8))
colors = sns.color_palette("husl", 6)
wedges, texts, autotexts = ax.pie(
train_dist.values,
labels=train_dist.index,
autopct='%1.1f%%',
startangle=90,
colors=colors,
textprops={'fontsize': 12, 'fontweight': 'bold'}
)
ax.set_title('Training Set - Emotion Distribution', fontsize=16, fontweight='bold', pad=20)
plt.setp(autotexts, size=10, weight="bold", color="white")
plt.tight_layout()
plt.savefig('emotion_distribution_pie.png', dpi=300, bbox_inches='tight')
plt.show()
Calculate text lengths¶
In [28]:
# Calculate text lengths
train_df['text_length'] = train_df['text'].apply(len)
train_df['word_count'] = train_df['text'].apply(lambda x: len(x.split()))
print(" TEXT LENGTH STATISTICS:")
print(train_df[['text_length', 'word_count']].describe())
TEXT LENGTH STATISTICS:
text_length word_count
count 16000.000000 16000.000000
mean 98.013063 19.386937
std 56.306394 11.065345
min 6.000000 2.000000
25% 54.750000 11.000000
50% 87.000000 17.000000
75% 129.000000 26.000000
max 356.000000 66.000000
Visualization 3: Text Length Distribution by Emotion¶
In [29]:
# Visualization 3: Text Length Distribution by Emotion
fig, axes = plt.subplots(2, 1, figsize=(14, 10))
# Character length distribution
for emotion in emotion_labels.values():
data = train_df[train_df['emotion_name'] == emotion]['text_length']
axes[0].hist(data, alpha=0.5, label=emotion, bins=30)
axes[0].set_title('Character Length Distribution by Emotion', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Character Length', fontsize=12)
axes[0].set_ylabel('Frequency', fontsize=12)
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Word count distribution
for emotion in emotion_labels.values():
data = train_df[train_df['emotion_name'] == emotion]['word_count']
axes[1].hist(data, alpha=0.5, label=emotion, bins=30)
axes[1].set_title('Word Count Distribution by Emotion', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Word Count', fontsize=12)
axes[1].set_ylabel('Frequency', fontsize=12)
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('text_length_distribution.png', dpi=300, bbox_inches='tight')
plt.show()
Visualization 4: Box Plot - Text Length by Emotion¶
In [30]:
# Visualization 4: Box Plot - Text Length by Emotion
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Character length box plot
train_df.boxplot(column='text_length', by='emotion_name', ax=axes[0])
axes[0].set_title('Character Length by Emotion', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Emotion', fontsize=12)
axes[0].set_ylabel('Character Length', fontsize=12)
axes[0].tick_params(axis='x', rotation=45)
plt.sca(axes[0])
plt.xticks(rotation=45)
# Word count box plot
train_df.boxplot(column='word_count', by='emotion_name', ax=axes[1])
axes[1].set_title('Word Count by Emotion', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Emotion', fontsize=12)
axes[1].set_ylabel('Word Count', fontsize=12)
axes[1].tick_params(axis='x', rotation=45)
plt.sca(axes[1])
plt.xticks(rotation=45)
plt.suptitle('') # Remove default title
plt.tight_layout()
plt.savefig('text_length_boxplot.png', dpi=300, bbox_inches='tight')
plt.show()
TEXT PREPROCESSING CLASS¶
In [31]:
class TextPreprocessor:
def __init__(self):
self.lemmatizer = WordNetLemmatizer()
self.stop_words = set(stopwords.words('english'))
# Keep negation words as they're important for emotion
self.stop_words -= {'not', 'no', 'nor', 'neither', 'never', 'none', 'nobody', 'nothing'}
def remove_urls(self, text):
"""Remove URLs from text"""
return re.sub(r'http\S+|www.\S+', '', text)
def remove_mentions(self, text):
"""Remove @ mentions"""
return re.sub(r'@\w+', '', text)
def process_hashtags(self, text):
"""Remove # but keep the text"""
return re.sub(r'#(\w+)', r'\1', text)
def remove_special_chars(self, text):
"""Remove special characters and punctuation"""
# Keep basic punctuation that might indicate emotion (! ?)
text = re.sub(r'[^\w\s!?]', ' ', text)
return text
def to_lowercase(self, text):
"""Convert text to lowercase"""
return text.lower()
def normalize_whitespace(self, text):
"""Normalize whitespace"""
return ' '.join(text.split())
def remove_numbers(self, text):
"""Remove numbers"""
return re.sub(r'\d+', '', text)
def tokenize(self, text):
"""Tokenize text"""
return word_tokenize(text)
def remove_stopwords(self, tokens):
"""Remove stopwords"""
return [token for token in tokens if token.lower() not in self.stop_words]
def lemmatize(self, tokens):
"""Lemmatize tokens"""
return [self.lemmatizer.lemmatize(token) for token in tokens]
def preprocess(self, text, keep_tokens=False):
# Step 1: Remove URLs
text = self.remove_urls(text)
# Step 2: Remove mentions
text = self.remove_mentions(text)
# Step 3: Process hashtags
text = self.process_hashtags(text)
# Step 4: Convert to lowercase
text = self.to_lowercase(text)
# Step 5: Remove special characters
text = self.remove_special_chars(text)
# Step 6: Remove numbers
text = self.remove_numbers(text)
# Step 7: Normalize whitespace
text = self.normalize_whitespace(text)
# Step 8: Tokenization
tokens = self.tokenize(text)
# Step 9: Remove stopwords
tokens = self.remove_stopwords(tokens)
# Step 10: Lemmatization
tokens = self.lemmatize(tokens)
# Remove empty tokens
tokens = [t for t in tokens if len(t) > 1]
if keep_tokens:
return tokens
else:
return ' '.join(tokens)
# Initialize preprocessor
preprocessor = TextPreprocessor()
In [32]:
print("\n Text Preprocessor initialized")
print("\nPreprocessing Steps:")
print(" 1. URL Removal")
print(" 2. Mention Removal (@username)")
print(" 3. Hashtag Processing (keep text, remove #)")
print(" 4. Lowercase Conversion")
print(" 5. Special Character Removal")
print(" 6. Number Removal")
print(" 7. Whitespace Normalization")
print(" 8. Tokenization")
print(" 9. Stopword Removal (keeping negations)")
print(" 10. Lemmatization")
Text Preprocessor initialized Preprocessing Steps: 1. URL Removal 2. Mention Removal (@username) 3. Hashtag Processing (keep text, remove #) 4. Lowercase Conversion 5. Special Character Removal 6. Number Removal 7. Whitespace Normalization 8. Tokenization 9. Stopword Removal (keeping negations) 10. Lemmatization
BEFORE AND AFTER PREPROCESSING¶
In [33]:
nltk.download('punkt_tab')
# Select sample tweets for demonstration
sample_texts = train_df.sample(5, random_state=42)['text'].tolist()
print("BEFORE AND AFTER PREPROCESSING:")
preprocessing_examples = []
for i, text in enumerate(sample_texts, 1):
cleaned = preprocessor.preprocess(text)
preprocessing_examples.append({
'Original': text,
'Cleaned': cleaned
})
print(f"\nExample {i}:")
print(f"Original: {text}")
print(f"Cleaned: {cleaned}")
BEFORE AND AFTER PREPROCESSING:
[nltk_data] Downloading package punkt_tab to /root/nltk_data... [nltk_data] Package punkt_tab is already up-to-date!
Example 1: Original: ive reverted back to my original blog title and purpose because honestly ive never strayed from my initial feeling about the delicate balance amp integration of woman and mother Cleaned: ive reverted back original blog title purpose honestly ive never strayed initial feeling delicate balance amp integration woman mother Example 2: Original: i feel hated i feel like i dont belong and more and more i feel that i want to die Cleaned: feel hated feel like dont belong feel want die Example 3: Original: i look at the wall ok feeling slightly irate Cleaned: look wall ok feeling slightly irate Example 4: Original: i can still feel a tender area when i touch it Cleaned: still feel tender area touch Example 5: Original: i wasnt really feeling up to reviewiing it on ba but i was pretty impressed Cleaned: wasnt really feeling reviewiing ba pretty impressed
Visualization of preprocessing effect¶
In [34]:
# Create visualization of preprocessing effect
fig, ax = plt.subplots(figsize=(12, 6))
original_lengths = [len(ex['Original']) for ex in preprocessing_examples]
cleaned_lengths = [len(ex['Cleaned']) for ex in preprocessing_examples]
x = np.arange(len(preprocessing_examples))
width = 0.35
bars1 = ax.bar(x - width/2, original_lengths, width, label='Original', alpha=0.8)
bars2 = ax.bar(x + width/2, cleaned_lengths, width, label='Cleaned', alpha=0.8)
ax.set_xlabel('Sample Tweets', fontsize=12)
ax.set_ylabel('Character Length', fontsize=12)
ax.set_title('Effect of Preprocessing on Text Length', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([f'Tweet {i+1}' for i in range(len(preprocessing_examples))])
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('preprocessing_effect.png', dpi=300, bbox_inches='tight')
plt.show()
APPLY PREPROCESSING¶
In [35]:
# Apply preprocessing
print("Processing training set...")
train_df['cleaned_text'] = train_df['text'].apply(lambda x: preprocessor.preprocess(x))
print("Processing validation set...")
val_df['cleaned_text'] = val_df['text'].apply(lambda x: preprocessor.preprocess(x))
print("Processing test set...")
test_df['cleaned_text'] = test_df['text'].apply(lambda x: preprocessor.preprocess(x))
print("Preprocessing completed for all datasets")
Processing training set... Processing validation set... Processing test set... Preprocessing completed for all datasets
In [36]:
# Check for empty texts after preprocessing
empty_train = train_df[train_df['cleaned_text'].str.strip() == ''].shape[0]
empty_val = val_df[val_df['cleaned_text'].str.strip() == ''].shape[0]
empty_test = test_df[test_df['cleaned_text'].str.strip() == ''].shape[0]
print(f"\nEmpty texts after preprocessing:")
print(f" Training: {empty_train}")
print(f" Validation: {empty_val}")
print(f" Test: {empty_test}")
Empty texts after preprocessing: Training: 1 Validation: 0 Test: 0
In [37]:
if empty_train > 0:
train_df = train_df[train_df['cleaned_text'].str.strip() != '']
print(f" Removed {empty_train} empty texts from training set")
Removed 1 empty texts from training set
In [38]:
# Get all tokens
all_tokens = []
for text in train_df['cleaned_text']:
all_tokens.extend(text.split())
# Vocabulary statistics
vocab_size = len(set(all_tokens))
total_tokens = len(all_tokens)
avg_tokens_per_doc = total_tokens / len(train_df)
print(f" VOCABULARY STATISTICS:")
print(f" Total unique words (vocabulary size): {vocab_size:,}")
print(f" Total tokens: {total_tokens:,}")
print(f" Average tokens per document: {avg_tokens_per_doc:.2f}")
VOCABULARY STATISTICS: Total unique words (vocabulary size): 13,733 Total tokens: 152,849 Average tokens per document: 9.55
MOST COMMON WORDS¶
In [39]:
# Most common words
word_freq = Counter(all_tokens)
most_common = word_freq.most_common(30)
print(f" TOP 30 MOST FREQUENT WORDS:")
print("-" * 80)
for word, freq in most_common:
print(f" {word:.<30} {freq:>6,}")
TOP 30 MOST FREQUENT WORDS: -------------------------------------------------------------------------------- feel.......................... 11,298 feeling....................... 5,630 like.......................... 2,745 im............................ 2,312 not........................... 1,895 time.......................... 1,017 really........................ 991 know.......................... 908 get........................... 851 little........................ 811 people........................ 723 would......................... 683 still......................... 683 one........................... 678 thing......................... 658 make.......................... 644 think......................... 615 day........................... 606 ive........................... 602 life.......................... 595 even.......................... 594 want.......................... 591 way........................... 578 love.......................... 543 something..................... 537 bit........................... 525 much.......................... 521 dont.......................... 489 could......................... 479 going......................... 463
Visualization: Top words¶
In [40]:
# Visualization: Top words
fig, ax = plt.subplots(figsize=(14, 8))
words, freqs = zip(*most_common)
ax.barh(range(len(words)), freqs, color=sns.color_palette("viridis", len(words)))
ax.set_yticks(range(len(words)))
ax.set_yticklabels(words)
ax.set_xlabel('Frequency', fontsize=12)
ax.set_title('Top 30 Most Frequent Words After Preprocessing', fontsize=14, fontweight='bold')
ax.invert_yaxis()
ax.grid(True, alpha=0.3, axis='x')
for i, v in enumerate(freqs):
ax.text(v + 50, i, str(v), va='center', fontweight='bold')
plt.tight_layout()
plt.savefig('top_words_frequency.png', dpi=300, bbox_inches='tight')
plt.show()
In [41]:
# Get top words for each emotion
emotion_words = {}
for emotion in emotion_labels.values():
emotion_texts = train_df[train_df['emotion_name'] == emotion]['cleaned_text']
emotion_tokens = []
for text in emotion_texts:
emotion_tokens.extend(text.split())
emotion_word_freq = Counter(emotion_tokens)
emotion_words[emotion] = emotion_word_freq.most_common(10)
print(" TOP 10 WORDS FOR EACH EMOTION:")
for emotion, words in emotion_words.items():
print(f"\n{emotion}:")
for word, freq in words:
print(f" {word:.<25} {freq:>5,}")
TOP 10 WORDS FOR EACH EMOTION: Sadness: feel..................... 1,900 feeling.................. 935 like..................... 491 im....................... 387 not...................... 305 time..................... 192 know..................... 151 really................... 150 get...................... 150 little................... 128 Joy: feel..................... 1,984 feeling.................. 832 like..................... 475 im....................... 350 not...................... 301 really................... 167 time..................... 165 make..................... 142 know..................... 135 one...................... 130 Love: feel..................... 1,952 feeling.................. 873 like..................... 560 im....................... 367 not...................... 362 love..................... 200 really................... 172 one...................... 163 know..................... 162 time..................... 154 Anger: feel..................... 1,808 feeling.................. 966 like..................... 470 im....................... 447 not...................... 329 get...................... 209 really................... 197 know..................... 160 people................... 153 time..................... 150 Fear: feel..................... 1,769 feeling.................. 1,048 im....................... 414 like..................... 337 not...................... 334 little................... 210 time..................... 177 know..................... 161 really................... 141 get...................... 131 Surprise: feel..................... 1,885 feeling.................. 976 like..................... 412 im....................... 347 impressed................ 267 not...................... 264 weird.................... 258 overwhelmed.............. 258 amazing.................. 258 strange.................. 252
Visualization: Top words by emotion¶
In [42]:
# Visualization: Top words by emotion
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()
for idx, (emotion, words) in enumerate(emotion_words.items()):
word_list, freq_list = zip(*words)
axes[idx].barh(range(len(word_list)), freq_list,
color=sns.color_palette("husl", 6)[idx])
axes[idx].set_yticks(range(len(word_list)))
axes[idx].set_yticklabels(word_list)
axes[idx].set_xlabel('Frequency', fontsize=10)
axes[idx].set_title(f'{emotion} - Top Words', fontsize=12, fontweight='bold')
axes[idx].invert_yaxis()
axes[idx].grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.savefig('emotion_specific_words.png', dpi=300, bbox_inches='tight')
plt.show()
In [43]:
# Prepare data
X_train = train_df['cleaned_text'].values
y_train = train_df['label'].values
X_val = val_df['cleaned_text'].values
y_val = val_df['label'].values
X_test = test_df['cleaned_text'].values
y_test = test_df['label'].values
In [44]:
print(f"\n DATA SHAPES:")
print(f" Training: {X_train.shape} samples")
print(f" Validation: {X_val.shape} samples")
print(f" Test: {X_test.shape} samples")
DATA SHAPES: Training: (15999,) samples Validation: (2000,) samples Test: (2000,) samples
TF-IDF FEATURE EXTRACTION¶
In [45]:
# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(
max_features=5000,
ngram_range=(1, 2), # Unigrams and bigrams
min_df=2,
max_df=0.8,
sublinear_tf=True
)
In [46]:
print("\n Fitting TF-IDF vectorizer on training data...")
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_val_tfidf = tfidf_vectorizer.transform(X_val)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
print(f"\n TF-IDF Vectorization completed")
print(f" Feature dimensions: {X_train_tfidf.shape[1]}")
print(f" Training matrix shape: {X_train_tfidf.shape}")
print(f" Validation matrix shape: {X_val_tfidf.shape}")
print(f" Test matrix shape: {X_test_tfidf.shape}")
Fitting TF-IDF vectorizer on training data... TF-IDF Vectorization completed Feature dimensions: 5000 Training matrix shape: (15999, 5000) Validation matrix shape: (2000, 5000) Test matrix shape: (2000, 5000)
MODEL 1: MULTINOMIAL NAIVE BAYES¶
In [47]:
# MODEL 1: MULTINOMIAL NAIVE BAYES
print("\n" + "=" * 80)
print("MODEL 1: MULTINOMIAL NAIVE BAYES")
print("=" * 80)
print("\n Training Multinomial Naive Bayes...")
mnb_model = MultinomialNB(alpha=1.0)
mnb_model.fit(X_train_tfidf, y_train)
================================================================================ MODEL 1: MULTINOMIAL NAIVE BAYES ================================================================================ Training Multinomial Naive Bayes...
Out[47]:
MultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MultinomialNB()
In [48]:
# Predictions
mnb_train_pred = mnb_model.predict(X_train_tfidf)
mnb_val_pred = mnb_model.predict(X_val_tfidf)
mnb_test_pred = mnb_model.predict(X_test_tfidf)
In [49]:
# Training accuracy
mnb_train_acc = accuracy_score(y_train, mnb_train_pred)
mnb_val_acc = accuracy_score(y_val, mnb_val_pred)
mnb_test_acc = accuracy_score(y_test, mnb_test_pred)
In [50]:
print(f"\n Naive Bayes Training Completed")
print(f"\n ACCURACY SCORES:")
print(f" Training Accuracy: {mnb_train_acc:.4f} ({mnb_train_acc*100:.2f}%)")
print(f" Validation Accuracy: {mnb_val_acc:.4f} ({mnb_val_acc*100:.2f}%)")
print(f" Test Accuracy: {mnb_test_acc:.4f} ({mnb_test_acc*100:.2f}%)")
Naive Bayes Training Completed ACCURACY SCORES: Training Accuracy: 0.9407 (94.07%) Validation Accuracy: 0.8755 (87.55%) Test Accuracy: 0.8835 (88.35%)
MODEL 1: MULTINOMIAL NAIVE BAYES (EVALUATION)¶
In [51]:
# Performance Metrics
def calculate_detailed_metrics(y_true, y_pred, model_name):
"""Calculate comprehensive performance metrics"""
metrics = {
'Model': model_name,
'Accuracy': accuracy_score(y_true, y_pred),
'Macro Precision': precision_score(y_true, y_pred, average='macro'),
'Macro Recall': recall_score(y_true, y_pred, average='macro'),
'Macro F1-Score': f1_score(y_true, y_pred, average='macro'),
'Weighted Precision': precision_score(y_true, y_pred, average='weighted'),
'Weighted Recall': recall_score(y_true, y_pred, average='weighted'),
'Weighted F1-Score': f1_score(y_true, y_pred, average='weighted'),
}
return metrics
In [52]:
# Calculate MNB metrics
mnb_metrics = calculate_detailed_metrics(y_test, mnb_test_pred, 'Multinomial Naive Bayes')
print("\n NAIVE BAYES - PERFORMACE METRICS:")
print("-" * 80)
for metric, value in mnb_metrics.items():
if metric != 'Model':
print(f" {metric:.<35} {value:.4f} ({value*100:.2f}%)")
NAIVE BAYES - PERFORMACE METRICS: -------------------------------------------------------------------------------- Accuracy........................... 0.8835 (88.35%) Macro Precision.................... 0.8862 (88.62%) Macro Recall....................... 0.8836 (88.36%) Macro F1-Score..................... 0.8829 (88.29%) Weighted Precision................. 0.8862 (88.62%) Weighted Recall.................... 0.8835 (88.35%) Weighted F1-Score.................. 0.8828 (88.28%)
In [53]:
# Per-class detailed metrics
from sklearn.metrics import precision_recall_fscore_support
mnb_prec, mnb_rec, mnb_f1, mnb_support = precision_recall_fscore_support(
y_test, mnb_test_pred, average=None, zero_division=0
)
mnb_per_class = pd.DataFrame({
'Emotion': list(emotion_labels.values()),
'Precision': mnb_prec,
'Recall': mnb_rec,
'F1-Score': mnb_f1,
'Support': mnb_support
})
print("\n NAIVE BAYES - PER-CLASS METRICS:")
print("-" * 80)
print(mnb_per_class.to_string(index=False))
NAIVE BAYES - PER-CLASS METRICS:
--------------------------------------------------------------------------------
Emotion Precision Recall F1-Score Support
Sadness 0.935897 0.876877 0.905426 333
Joy 0.909722 0.784431 0.842444 334
Love 0.871935 0.960961 0.914286 333
Anger 0.906832 0.876877 0.891603 333
Fear 0.860606 0.850299 0.855422 334
Surprise 0.832021 0.951952 0.887955 333
In [54]:
# Classification Report
print(f"\n CLASSIFICATION REPORT (Test Set):")
print("-" * 80)
mnb_report = classification_report(y_test, mnb_test_pred, target_names=list(emotion_labels.values()),digits=4)
print(mnb_report)
CLASSIFICATION REPORT (Test Set):
--------------------------------------------------------------------------------
precision recall f1-score support
Sadness 0.9359 0.8769 0.9054 333
Joy 0.9097 0.7844 0.8424 334
Love 0.8719 0.9610 0.9143 333
Anger 0.9068 0.8769 0.8916 333
Fear 0.8606 0.8503 0.8554 334
Surprise 0.8320 0.9520 0.8880 333
accuracy 0.8835 2000
macro avg 0.8862 0.8836 0.8829 2000
weighted avg 0.8862 0.8835 0.8828 2000
In [55]:
# Visualization: Metrics Bar Chart for MNB
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Precision
axes[0, 0].bar(mnb_per_class['Emotion'], mnb_per_class['Precision'],
color='skyblue', alpha=0.8, edgecolor='black')
axes[0, 0].set_title('Naive Bayes - Precision by Emotion', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Precision', fontsize=12)
axes[0, 0].set_ylim([0, 1.0])
axes[0, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(mnb_per_class['Precision']):
axes[0, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Recall
axes[0, 1].bar(mnb_per_class['Emotion'], mnb_per_class['Recall'],
color='lightgreen', alpha=0.8, edgecolor='black')
axes[0, 1].set_title('Naive Bayes - Recall by Emotion', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Recall', fontsize=12)
axes[0, 1].set_ylim([0, 1.0])
axes[0, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(mnb_per_class['Recall']):
axes[0, 1].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# F1-Score
axes[1, 0].bar(mnb_per_class['Emotion'], mnb_per_class['F1-Score'],
color='salmon', alpha=0.8, edgecolor='black')
axes[1, 0].set_title('Naive Bayes - F1-Score by Emotion', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('F1-Score', fontsize=12)
axes[1, 0].set_ylim([0, 1.0])
axes[1, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(mnb_per_class['F1-Score']):
axes[1, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Support
axes[1, 1].bar(mnb_per_class['Emotion'], mnb_per_class['Support'],
color='plum', alpha=0.8, edgecolor='black')
axes[1, 1].set_title('Naive Bayes - Sample Support by Emotion', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Number of Samples', fontsize=12)
axes[1, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(mnb_per_class['Support']):
axes[1, 1].text(i, v + 10, f'{int(v)}', ha='center', fontweight='bold')
plt.suptitle('Multinomial Naive Bayes - Detailed Performance Metrics',
fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('mnb_detailed_metrics.png', dpi=300, bbox_inches='tight')
plt.show()
In [56]:
# Confusion Matrix
mnb_cm = confusion_matrix(y_test, mnb_test_pred)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(mnb_cm, annot=True, fmt='d', cmap='Blues',
xticklabels=list(emotion_labels.values()),
yticklabels=list(emotion_labels.values()),
cbar_kws={'label': 'Count'})
ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
ax.set_title('Multinomial Naive Bayes - Confusion Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('mnb_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()
In [57]:
# Binarize labels
y_test_bin = label_binarize(y_test, classes=[0, 1, 2, 3, 4, 5])
n_classes = y_test_bin.shape[1]
# Predict probabilities
mnb_proba = mnb_model.predict_proba(X_test_tfidf)
# Colors for each class
colors = sns.color_palette("husl", n_classes)
# Create a 3x3 grid of subplots
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
axes = axes.flatten()
for i, (color, emotion) in enumerate(zip(colors, emotion_labels.values())):
precision, recall, _ = precision_recall_curve(y_test_bin[:, i], mnb_proba[:, i])
avg_precision = average_precision_score(y_test_bin[:, i], mnb_proba[:, i])
ax = axes[i]
ax.plot(recall, precision, color=color, lw=2,
label=f'{emotion} (AP = {avg_precision:.3f})')
ax.set_xlabel('Recall', fontsize=10)
ax.set_ylabel('Precision', fontsize=10)
ax.set_title(f'{emotion}', fontsize=12, fontweight='bold')
ax.legend(loc="best", fontsize=8)
ax.grid(True, alpha=0.3)
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
# Hide unused subplots if n_classes < 9
for j in range(i+1, len(axes)):
fig.delaxes(axes[j])
plt.tight_layout()
plt.savefig('mnb_precision_recall_grid.png', dpi=300, bbox_inches='tight')
plt.show()
Sample Predictions for MNB¶
In [58]:
# Sample Predictions for MNB
print("\n NAIVE BAYES - SAMPLE PREDICTIONS:")
print("-" * 100)
sample_indices = np.random.choice(len(test_df), 10, replace=False)
mnb_samples = []
for idx in sample_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
true_label = y_test[idx]
pred_label = mnb_test_pred[idx]
text_tfidf = tfidf_vectorizer.transform([test_df.loc[actual_idx, 'cleaned_text']])
probabilities = mnb_model.predict_proba(text_tfidf)[0]
confidence = probabilities[pred_label]
mnb_samples.append({
'Text': text[:60] + '...' if len(text) > 60 else text,
'True': emotion_labels[true_label],
'Predicted': emotion_labels[pred_label],
'Confidence': f'{confidence:.2%}',
'Correct': '100%' if true_label == pred_label else '0%'
})
mnb_samples_df = pd.DataFrame(mnb_samples)
print(mnb_samples_df.to_string(index=False))
NAIVE BAYES - SAMPLE PREDICTIONS:
----------------------------------------------------------------------------------------------------
Text True Predicted Confidence Correct
i wake up in the morning after a great night with my dear hu... Anger Anger 51.23% 100%
i have a strong feeling were going to come back from this to... Sadness Joy 25.30% 0%
i feel so hopeless and usually just want o scream Sadness Sadness 55.08% 100%
i can think of jim and all that we had and not feel that col... Anger Anger 46.45% 100%
i feel frightened in a kind of a raw way Fear Fear 82.78% 100%
i imagine that the school bullying that followed made everyt... Sadness Sadness 45.42% 100%
i feel is fans are really supporting me and telling me im st... Love Love 33.76% 100%
i feel as though at any moment i could lose my salvation bec... Sadness Sadness 27.37% 100%
when a friend of mine keeps telling me morbid things that ha... Anger Anger 24.89% 100%
i dont know why i feel surprised at the difficulty of the te... Surprise Surprise 62.61% 100%
MODEL 2: LOGISTIC REGRESSION¶
In [59]:
# MODEL 2: LOGISTIC REGRESSION
print("\n" + "=" * 80)
print("MODEL 2: LOGISTIC REGRESSION")
print("=" * 80)
print("\n Training Logistic Regression...")
lr_model = LogisticRegression(
max_iter=1000,
C=1.0,
penalty='l2',
solver='lbfgs',
random_state=42,
n_jobs=-1
)
lr_model.fit(X_train_tfidf, y_train)
================================================================================ MODEL 2: LOGISTIC REGRESSION ================================================================================ Training Logistic Regression...
Out[59]:
LogisticRegression(max_iter=1000, n_jobs=-1, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(max_iter=1000, n_jobs=-1, random_state=42)
In [60]:
# Predictions
lr_train_pred = lr_model.predict(X_train_tfidf)
lr_val_pred = lr_model.predict(X_val_tfidf)
lr_test_pred = lr_model.predict(X_test_tfidf)
In [61]:
# Accuracy
lr_train_acc = accuracy_score(y_train, lr_train_pred)
lr_val_acc = accuracy_score(y_val, lr_val_pred)
lr_test_acc = accuracy_score(y_test, lr_test_pred)
In [62]:
print(f"\n Logistic Regression Training Completed")
print(f"\n ACCURACY SCORES:")
print(f" Training Accuracy: {lr_train_acc:.4f} ({lr_train_acc*100:.2f}%)")
print(f" Validation Accuracy: {lr_val_acc:.4f} ({lr_val_acc*100:.2f}%)")
print(f" Test Accuracy: {lr_test_acc:.4f} ({lr_test_acc*100:.2f}%)")
Logistic Regression Training Completed ACCURACY SCORES: Training Accuracy: 0.9566 (95.66%) Validation Accuracy: 0.9040 (90.40%) Test Accuracy: 0.9160 (91.60%)
MODEL 2 - LOGISTIC REGRESSION (EVALUATION)¶
In [63]:
# Calculate LR metrics
lr_metrics = calculate_detailed_metrics(y_test, lr_test_pred, 'Logistic Regression')
print("\n LOGISTIC REGRESSION - PERFORMANCE METRICS:")
print("-" * 80)
for metric, value in lr_metrics.items():
if metric != 'Model':
print(f" {metric:.<35} {value:.4f} ({value*100:.2f}%)")
# Per-class detailed metrics
lr_prec, lr_rec, lr_f1, lr_support = precision_recall_fscore_support(
y_test, lr_test_pred, average=None, zero_division=0
)
lr_per_class = pd.DataFrame({
'Emotion': list(emotion_labels.values()),
'Precision': lr_prec,
'Recall': lr_rec,
'F1-Score': lr_f1,
'Support': lr_support
})
print("\n LOGISTIC REGRESSION - PER-CLASS METRICS:")
print("-" * 80)
print(lr_per_class.to_string(index=False))
LOGISTIC REGRESSION - PERFORMANCE METRICS:
--------------------------------------------------------------------------------
Accuracy........................... 0.9160 (91.60%)
Macro Precision.................... 0.9166 (91.66%)
Macro Recall....................... 0.9161 (91.61%)
Macro F1-Score..................... 0.9155 (91.55%)
Weighted Precision................. 0.9166 (91.66%)
Weighted Recall.................... 0.9160 (91.60%)
Weighted F1-Score.................. 0.9155 (91.55%)
LOGISTIC REGRESSION - PER-CLASS METRICS:
--------------------------------------------------------------------------------
Emotion Precision Recall F1-Score Support
Sadness 0.936170 0.924925 0.930514 333
Joy 0.931373 0.853293 0.890625 334
Love 0.913408 0.981982 0.946454 333
Anger 0.940439 0.900901 0.920245 333
Fear 0.880734 0.862275 0.871407 334
Surprise 0.897507 0.972973 0.933718 333
In [64]:
# Classification Report
print(f"\n CLASSIFICATION REPORT (Test Set):")
print("-" * 80)
lr_report = classification_report(y_test, lr_test_pred,
target_names=list(emotion_labels.values()),
digits=4)
print(lr_report)
CLASSIFICATION REPORT (Test Set):
--------------------------------------------------------------------------------
precision recall f1-score support
Sadness 0.9362 0.9249 0.9305 333
Joy 0.9314 0.8533 0.8906 334
Love 0.9134 0.9820 0.9465 333
Anger 0.9404 0.9009 0.9202 333
Fear 0.8807 0.8623 0.8714 334
Surprise 0.8975 0.9730 0.9337 333
accuracy 0.9160 2000
macro avg 0.9166 0.9161 0.9155 2000
weighted avg 0.9166 0.9160 0.9155 2000
In [65]:
# Visualization: Metrics Bar Chart for LR
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Precision
axes[0, 0].bar(lr_per_class['Emotion'], lr_per_class['Precision'],
color='skyblue', alpha=0.8, edgecolor='black')
axes[0, 0].set_title('Logistic Regression - Precision by Emotion', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Precision', fontsize=12)
axes[0, 0].set_ylim([0, 1.0])
axes[0, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(lr_per_class['Precision']):
axes[0, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Recall
axes[0, 1].bar(lr_per_class['Emotion'], lr_per_class['Recall'],
color='lightgreen', alpha=0.8, edgecolor='black')
axes[0, 1].set_title('Logistic Regression - Recall by Emotion', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Recall', fontsize=12)
axes[0, 1].set_ylim([0, 1.0])
axes[0, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(lr_per_class['Recall']):
axes[0, 1].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# F1-Score
axes[1, 0].bar(lr_per_class['Emotion'], lr_per_class['F1-Score'],
color='salmon', alpha=0.8, edgecolor='black')
axes[1, 0].set_title('Logistic Regression - F1-Score by Emotion', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('F1-Score', fontsize=12)
axes[1, 0].set_ylim([0, 1.0])
axes[1, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(lr_per_class['F1-Score']):
axes[1, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Support
axes[1, 1].bar(lr_per_class['Emotion'], lr_per_class['Support'],
color='plum', alpha=0.8, edgecolor='black')
axes[1, 1].set_title('Logistic Regression - Sample Support by Emotion', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Number of Samples', fontsize=12)
axes[1, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(lr_per_class['Support']):
axes[1, 1].text(i, v + 10, f'{int(v)}', ha='center', fontweight='bold')
plt.suptitle('Logistic Regression - Detailed Performance Metrics',
fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('lr_detailed_metrics.png', dpi=300, bbox_inches='tight')
plt.show()
In [66]:
# Confusion Matrix
lr_cm = confusion_matrix(y_test, lr_test_pred)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(lr_cm, annot=True, fmt='d', cmap='Greens',
xticklabels=list(emotion_labels.values()),
yticklabels=list(emotion_labels.values()),
cbar_kws={'label': 'Count'})
ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
ax.set_title('Logistic Regression - Confusion Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('lr_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()
In [67]:
# Predict probabilities
lr_proba = lr_model.predict_proba(X_test_tfidf)
# Number of classes
n_classes = len(emotion_labels)
# Create a 3x3 grid of subplots
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
axes = axes.flatten() # flatten to easily index
for i, (color, emotion) in enumerate(zip(colors, emotion_labels.values())):
precision, recall, _ = precision_recall_curve(y_test_bin[:, i], lr_proba[:, i])
avg_precision = average_precision_score(y_test_bin[:, i], lr_proba[:, i])
ax = axes[i]
ax.plot(recall, precision, color=color, lw=2,
label=f'{emotion} (AP = {avg_precision:.3f})')
ax.set_xlabel('Recall', fontsize=10)
ax.set_ylabel('Precision', fontsize=10)
ax.set_title(f'{emotion}', fontsize=12, fontweight='bold')
ax.legend(loc="best", fontsize=8)
ax.grid(True, alpha=0.3)
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
for j in range(i+1, len(axes)):
fig.delaxes(axes[j])
plt.tight_layout()
plt.savefig('lr_precision_recall_grid.png', dpi=300, bbox_inches='tight')
plt.show()
Sample Predictions for LOGISTIC REGRESSION¶
In [68]:
# Sample Predictions for LR
print("\nLOGISTIC REGRESSION - SAMPLE PREDICTIONS:")
print("-" * 80)
lr_samples = []
for idx in sample_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
true_label = y_test[idx]
pred_label = lr_test_pred[idx]
text_tfidf = tfidf_vectorizer.transform([test_df.loc[actual_idx, 'cleaned_text']])
probabilities = lr_model.predict_proba(text_tfidf)[0]
confidence = probabilities[pred_label]
lr_samples.append({
'Text': text[:60] + '...' if len(text) > 60 else text,
'True': emotion_labels[true_label],
'Predicted': emotion_labels[pred_label],
'Confidence': f'{confidence:.2%}',
'Correct': 'YES' if true_label == pred_label else 'NO'
})
lr_samples_df = pd.DataFrame(lr_samples)
print(lr_samples_df.to_string(index=False))
LOGISTIC REGRESSION - SAMPLE PREDICTIONS:
--------------------------------------------------------------------------------
Text True Predicted Confidence Correct
i wake up in the morning after a great night with my dear hu... Anger Anger 60.74% YES
i have a strong feeling were going to come back from this to... Sadness Joy 34.47% NO
i feel so hopeless and usually just want o scream Sadness Sadness 77.94% YES
i can think of jim and all that we had and not feel that col... Anger Anger 73.95% YES
i feel frightened in a kind of a raw way Fear Fear 93.77% YES
i imagine that the school bullying that followed made everyt... Sadness Sadness 58.83% YES
i feel is fans are really supporting me and telling me im st... Love Love 68.08% YES
i feel as though at any moment i could lose my salvation bec... Sadness Sadness 45.10% YES
when a friend of mine keeps telling me morbid things that ha... Anger Anger 33.93% YES
i dont know why i feel surprised at the difficulty of the te... Surprise Surprise 87.83% YES
MODEL 3: SUPPORT VECTOR MACHINE (SVM)¶
In [69]:
# MODEL 3: SUPPORT VECTOR MACHINE (SVM)
print("\n" + "=" * 80)
print("MODEL 3: SUPPORT VECTOR MACHINE (LinearSVC)")
print("=" * 80)
print("\n Training SVM (this may take a few minutes)...")
svm_model = LinearSVC(
C=1.0,
max_iter=2000,
random_state=42,
dual=False
)
svm_model.fit(X_train_tfidf, y_train)
================================================================================ MODEL 3: SUPPORT VECTOR MACHINE (LinearSVC) ================================================================================ Training SVM (this may take a few minutes)...
Out[69]:
LinearSVC(dual=False, max_iter=2000, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearSVC(dual=False, max_iter=2000, random_state=42)
In [70]:
# Predictions
svm_train_pred = svm_model.predict(X_train_tfidf)
svm_val_pred = svm_model.predict(X_val_tfidf)
svm_test_pred = svm_model.predict(X_test_tfidf)
In [71]:
# Accuracy
svm_train_acc = accuracy_score(y_train, svm_train_pred)
svm_val_acc = accuracy_score(y_val, svm_val_pred)
svm_test_acc = accuracy_score(y_test, svm_test_pred)
In [72]:
print(f"\n SVM Training Completed")
print(f"\n ACCURACY SCORES:")
print(f" Training Accuracy: {svm_train_acc:.4f} ({svm_train_acc*100:.2f}%)")
print(f" Validation Accuracy: {svm_val_acc:.4f} ({svm_val_acc*100:.2f}%)")
print(f" Test Accuracy: {svm_test_acc:.4f} ({svm_test_acc*100:.2f}%)")
SVM Training Completed ACCURACY SCORES: Training Accuracy: 0.9792 (97.92%) Validation Accuracy: 0.9090 (90.90%) Test Accuracy: 0.9130 (91.30%)
MODEL 3: SUPPORT VECTOR MACHINE (EVALUATION)¶
In [73]:
# Calculate SVM metrics
svm_metrics = calculate_detailed_metrics(y_test, svm_test_pred, 'Support Vector Machine')
print("\nSVM - PERFORMANCE METRICS:")
print("-" * 80)
for metric, value in svm_metrics.items():
if metric != 'Model':
print(f" {metric:.<35} {value:.4f} ({value*100:.2f}%)")
# Per-class detailed metrics
svm_prec, svm_rec, svm_f1, svm_support = precision_recall_fscore_support(
y_test, svm_test_pred, average=None, zero_division=0
)
svm_per_class = pd.DataFrame({
'Emotion': list(emotion_labels.values()),
'Precision': svm_prec,
'Recall': svm_rec,
'F1-Score': svm_f1,
'Support': svm_support
})
print("\nSVM - PER-CLASS METRICS:")
print("-" * 80)
print(svm_per_class.to_string(index=False))
SVM - PERFORMANCE METRICS:
--------------------------------------------------------------------------------
Accuracy........................... 0.9130 (91.30%)
Macro Precision.................... 0.9134 (91.34%)
Macro Recall....................... 0.9131 (91.31%)
Macro F1-Score..................... 0.9126 (91.26%)
Weighted Precision................. 0.9134 (91.34%)
Weighted Recall.................... 0.9130 (91.30%)
Weighted F1-Score.................. 0.9126 (91.26%)
SVM - PER-CLASS METRICS:
--------------------------------------------------------------------------------
Emotion Precision Recall F1-Score Support
Sadness 0.939024 0.924925 0.931921 333
Joy 0.922330 0.853293 0.886470 334
Love 0.917379 0.966967 0.941520 333
Anger 0.928349 0.894895 0.911315 333
Fear 0.879154 0.871257 0.875188 334
Surprise 0.894444 0.966967 0.929293 333
In [74]:
# Visualization: Metrics Bar Chart for SVM
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Precision
axes[0, 0].bar(svm_per_class['Emotion'], svm_per_class['Precision'],
color='skyblue', alpha=0.8, edgecolor='black')
axes[0, 0].set_title('SVM - Precision by Emotion', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Precision', fontsize=12)
axes[0, 0].set_ylim([0, 1.0])
axes[0, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(svm_per_class['Precision']):
axes[0, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Recall
axes[0, 1].bar(svm_per_class['Emotion'], svm_per_class['Recall'],
color='lightgreen', alpha=0.8, edgecolor='black')
axes[0, 1].set_title('SVM - Recall by Emotion', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Recall', fontsize=12)
axes[0, 1].set_ylim([0, 1.0])
axes[0, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(svm_per_class['Recall']):
axes[0, 1].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# F1-Score
axes[1, 0].bar(svm_per_class['Emotion'], svm_per_class['F1-Score'],
color='salmon', alpha=0.8, edgecolor='black')
axes[1, 0].set_title('SVM - F1-Score by Emotion', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('F1-Score', fontsize=12)
axes[1, 0].set_ylim([0, 1.0])
axes[1, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(svm_per_class['F1-Score']):
axes[1, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Support
axes[1, 1].bar(svm_per_class['Emotion'], svm_per_class['Support'],
color='plum', alpha=0.8, edgecolor='black')
axes[1, 1].set_title('SVM - Sample Support by Emotion', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Number of Samples', fontsize=12)
axes[1, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(svm_per_class['Support']):
axes[1, 1].text(i, v + 10, f'{int(v)}', ha='center', fontweight='bold')
plt.suptitle('Support Vector Machine - Detailed Performance Metrics',
fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('svm_detailed_metrics.png', dpi=300, bbox_inches='tight')
plt.show()
In [75]:
# Classification Report
print(f"\n CLASSIFICATION REPORT (Test Set):")
print("-" * 80)
svm_report = classification_report(y_test, svm_test_pred,
target_names=list(emotion_labels.values()),
digits=4)
print(svm_report)
CLASSIFICATION REPORT (Test Set):
--------------------------------------------------------------------------------
precision recall f1-score support
Sadness 0.9390 0.9249 0.9319 333
Joy 0.9223 0.8533 0.8865 334
Love 0.9174 0.9670 0.9415 333
Anger 0.9283 0.8949 0.9113 333
Fear 0.8792 0.8713 0.8752 334
Surprise 0.8944 0.9670 0.9293 333
accuracy 0.9130 2000
macro avg 0.9134 0.9131 0.9126 2000
weighted avg 0.9134 0.9130 0.9126 2000
In [76]:
# Confusion Matrix
svm_cm = confusion_matrix(y_test, svm_test_pred)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(svm_cm, annot=True, fmt='d', cmap='Oranges',
xticklabels=list(emotion_labels.values()),
yticklabels=list(emotion_labels.values()),
cbar_kws={'label': 'Count'})
ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
ax.set_title('Support Vector Machine - Confusion Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('svm_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()
In [77]:
from scipy.special import softmax
# Get decision scores and convert to probabilities
svm_decision = svm_model.decision_function(X_test_tfidf)
svm_proba = softmax(svm_decision, axis=1)
# Number of classes
n_classes = y_test_bin.shape[1]
# Colors for each class
colors = sns.color_palette("husl", n_classes)
# Create a 3x3 grid of subplots
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
axes = axes.flatten()
for i, (color, emotion) in enumerate(zip(colors, emotion_labels.values())):
precision, recall, _ = precision_recall_curve(y_test_bin[:, i], svm_proba[:, i])
avg_precision = average_precision_score(y_test_bin[:, i], svm_proba[:, i])
ax = axes[i]
ax.plot(recall, precision, color=color, lw=2,
label=f'{emotion} (AP = {avg_precision:.3f})')
ax.set_xlabel('Recall', fontsize=10)
ax.set_ylabel('Precision', fontsize=10)
ax.set_title(f'{emotion}', fontsize=12, fontweight='bold')
ax.legend(loc="best", fontsize=8)
ax.grid(True, alpha=0.3)
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
# Hide unused subplots if n_classes < 9
for j in range(i+1, len(axes)):
fig.delaxes(axes[j])
plt.tight_layout()
plt.savefig('svm_precision_recall_grid.png', dpi=300, bbox_inches='tight')
plt.show()
Sample Predictions for SVM¶
In [78]:
# Sample Predictions for SVM
print("\n SVM - SAMPLE PREDICTIONS:")
print("-" * 80)
svm_samples = []
for idx in sample_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
true_label = y_test[idx]
pred_label = svm_test_pred[idx]
text_tfidf = tfidf_vectorizer.transform([test_df.loc[actual_idx, 'cleaned_text']])
decision_values = svm_model.decision_function(text_tfidf)[0]
probabilities = softmax(decision_values)
confidence = probabilities[pred_label]
svm_samples.append({
'Text': text[:60] + '...' if len(text) > 60 else text,
'True': emotion_labels[true_label],
'Predicted': emotion_labels[pred_label],
'Confidence': f'{confidence:.2%}',
'Correct': 'YES' if true_label == pred_label else 'NO'
})
svm_samples_df = pd.DataFrame(svm_samples)
print(svm_samples_df.to_string(index=False))
SVM - SAMPLE PREDICTIONS:
--------------------------------------------------------------------------------
Text True Predicted Confidence Correct
i wake up in the morning after a great night with my dear hu... Anger Anger 46.41% YES
i have a strong feeling were going to come back from this to... Sadness Sadness 26.39% YES
i feel so hopeless and usually just want o scream Sadness Sadness 64.00% YES
i can think of jim and all that we had and not feel that col... Anger Anger 66.59% YES
i feel frightened in a kind of a raw way Fear Fear 87.58% YES
i imagine that the school bullying that followed made everyt... Sadness Sadness 46.98% YES
i feel is fans are really supporting me and telling me im st... Love Love 65.84% YES
i feel as though at any moment i could lose my salvation bec... Sadness Sadness 33.12% YES
when a friend of mine keeps telling me morbid things that ha... Anger Anger 28.82% YES
i dont know why i feel surprised at the difficulty of the te... Surprise Surprise 77.12% YES
COMPARISON OF TRADITIONAL ML MODELS¶
In [79]:
print("COMPARISON OF TRADITIONAL ML MODELS")
# Create comparison DataFrame
comparison_data = {
'Model': ['Naive Bayes', 'Logistic Regression', 'SVM'],
'Train Accuracy': [mnb_train_acc, lr_train_acc, svm_train_acc],
'Validation Accuracy': [mnb_val_acc, lr_val_acc, svm_val_acc],
'Test Accuracy': [mnb_test_acc, lr_test_acc, svm_test_acc]
}
comparison_df = pd.DataFrame(comparison_data)
print("\n MODEL ACCURACY COMPARISON:")
print("-" * 80)
print(comparison_df.to_string(index=False))
COMPARISON OF TRADITIONAL ML MODELS
MODEL ACCURACY COMPARISON:
--------------------------------------------------------------------------------
Model Train Accuracy Validation Accuracy Test Accuracy
Naive Bayes 0.940684 0.8755 0.8835
Logistic Regression 0.956622 0.9040 0.9160
SVM 0.979186 0.9090 0.9130
Visualization: Model Comparison¶
In [80]:
# Visualization: Model Comparison
fig, ax = plt.subplots(figsize=(12, 7))
x = np.arange(len(comparison_df['Model']))
width = 0.25
bars1 = ax.bar(x - width, comparison_df['Train Accuracy'], width,
label='Train', alpha=0.8, color='skyblue')
bars2 = ax.bar(x, comparison_df['Validation Accuracy'], width,
label='Validation', alpha=0.8, color='lightgreen')
bars3 = ax.bar(x + width, comparison_df['Test Accuracy'], width,
label='Test', alpha=0.8, color='salmon')
ax.set_xlabel('Models', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax.set_title('Traditional ML Models - Accuracy Comparison', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(comparison_df['Model'])
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
ax.set_ylim([0, 1.0])
# Add value labels
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=9, fontweight='bold')
plt.tight_layout()
plt.savefig('ml_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
In [81]:
# Per-class performance comparison
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()
# Get per-class metrics for each model
from sklearn.metrics import precision_recall_fscore_support
models_data = [
('Naive Bayes', mnb_test_pred),
('Logistic Regression', lr_test_pred),
('SVM', svm_test_pred)
]
for idx, emotion_label in enumerate(emotion_labels.values()):
emotion_idx = list(emotion_labels.values()).index(emotion_label)
precisions = []
recalls = []
f1_scores = []
for model_name, predictions in models_data:
prec, rec, f1, _ = precision_recall_fscore_support(
y_test, predictions, average=None, zero_division=0
)
precisions.append(prec[emotion_idx])
recalls.append(rec[emotion_idx])
f1_scores.append(f1[emotion_idx])
x = np.arange(len(models_data))
width = 0.25
axes[idx].bar(x - width, precisions, width, label='Precision', alpha=0.8)
axes[idx].bar(x, recalls, width, label='Recall', alpha=0.8)
axes[idx].bar(x + width, f1_scores, width, label='F1-Score', alpha=0.8)
axes[idx].set_xlabel('Models', fontsize=10)
axes[idx].set_ylabel('Score', fontsize=10)
axes[idx].set_title(f'{emotion_label} - Metrics', fontsize=12, fontweight='bold')
axes[idx].set_xticks(x)
axes[idx].set_xticklabels([m[0] for m in models_data], rotation=15, ha='right')
axes[idx].legend(fontsize=9)
axes[idx].set_ylim([0, 1.0])
axes[idx].grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('per_class_metrics_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
MODEL 4 - ENSEMBLE (TRAINING)¶
In [82]:
print("\n Creating ensemble model (Soft Voting)...")
# Create ensemble with the three traditional ML models
ensemble_model = VotingClassifier(
estimators=[
('naive_bayes', mnb_model),
('logistic_regression', lr_model),
('svm', svm_model)
],
voting='hard', # Use hard voting (majority vote)
n_jobs=-1
)
Creating ensemble model (Soft Voting)...
In [83]:
# Train ensemble model
print(" Training ensemble model...")
ensemble_model.fit(X_train_tfidf, y_train)
Training ensemble model...
Out[83]:
VotingClassifier(estimators=[('naive_bayes', MultinomialNB()),
('logistic_regression',
LogisticRegression(max_iter=1000, n_jobs=-1,
random_state=42)),
('svm',
LinearSVC(dual=False, max_iter=2000,
random_state=42))],
n_jobs=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
VotingClassifier(estimators=[('naive_bayes', MultinomialNB()),
('logistic_regression',
LogisticRegression(max_iter=1000, n_jobs=-1,
random_state=42)),
('svm',
LinearSVC(dual=False, max_iter=2000,
random_state=42))],
n_jobs=-1)MultinomialNB()
LogisticRegression(max_iter=1000, n_jobs=-1, random_state=42)
LinearSVC(dual=False, max_iter=2000, random_state=42)
In [84]:
# Predictions
ensemble_train_pred = ensemble_model.predict(X_train_tfidf)
ensemble_val_pred = ensemble_model.predict(X_val_tfidf)
ensemble_test_pred = ensemble_model.predict(X_test_tfidf)
In [85]:
# Accuracy
ensemble_train_acc = accuracy_score(y_train, ensemble_train_pred)
ensemble_val_acc = accuracy_score(y_val, ensemble_val_pred)
ensemble_test_acc = accuracy_score(y_test, ensemble_test_pred)
In [86]:
print(f"\n Ensemble Model Training Completed")
print(f"\n ACCURACY SCORES:")
print(f" Training Accuracy: {ensemble_train_acc:.4f} ({ensemble_train_acc*100:.2f}%)")
print(f" Validation Accuracy: {ensemble_val_acc:.4f} ({ensemble_val_acc*100:.2f}%)")
print(f" Test Accuracy: {ensemble_test_acc:.4f} ({ensemble_test_acc*100:.2f}%)")
Ensemble Model Training Completed ACCURACY SCORES: Training Accuracy: 0.9617 (96.17%) Validation Accuracy: 0.9060 (90.60%) Test Accuracy: 0.9170 (91.70%)
MODEL 4 - ENSEMBLE (EVALUATION)¶
In [87]:
# Calculate Ensemble metrics
ensemble_metrics = {
'Model': 'Ensemble (Voting Classifier)',
'Accuracy': accuracy_score(y_test, ensemble_test_pred),
'Macro Precision': precision_score(y_test, ensemble_test_pred, average='macro'),
'Macro Recall': recall_score(y_test, ensemble_test_pred, average='macro'),
'Macro F1-Score': f1_score(y_test, ensemble_test_pred, average='macro'),
'Weighted Precision': precision_score(y_test, ensemble_test_pred, average='weighted'),
'Weighted Recall': recall_score(y_test, ensemble_test_pred, average='weighted'),
'Weighted F1-Score': f1_score(y_test, ensemble_test_pred, average='weighted'),
}
print("\n ENSEMBLE MODEL - COMPREHENSIVE METRICS:")
print("-" * 80)
for metric, value in ensemble_metrics.items():
if metric != 'Model':
print(f" {metric:.<35} {value:.4f} ({value*100:.2f}%)")
# Per-class detailed metrics
ensemble_prec, ensemble_rec, ensemble_f1, ensemble_support = precision_recall_fscore_support(
y_test, ensemble_test_pred, average=None, zero_division=0
)
ensemble_per_class = pd.DataFrame({
'Emotion': list(emotion_labels.values()),
'Precision': ensemble_prec,
'Recall': ensemble_rec,
'F1-Score': ensemble_f1,
'Support': ensemble_support
})
print("\n ENSEMBLE MODEL - PER-CLASS METRICS:")
print("-" * 80)
print(ensemble_per_class.to_string(index=False))
ENSEMBLE MODEL - COMPREHENSIVE METRICS:
--------------------------------------------------------------------------------
Accuracy........................... 0.9170 (91.70%)
Macro Precision.................... 0.9177 (91.77%)
Macro Recall....................... 0.9171 (91.71%)
Macro F1-Score..................... 0.9165 (91.65%)
Weighted Precision................. 0.9177 (91.77%)
Weighted Recall.................... 0.9170 (91.70%)
Weighted F1-Score.................. 0.9164 (91.64%)
ENSEMBLE MODEL - PER-CLASS METRICS:
--------------------------------------------------------------------------------
Emotion Precision Recall F1-Score Support
Sadness 0.948012 0.930931 0.939394 333
Joy 0.931148 0.850299 0.888889 334
Love 0.913408 0.981982 0.946454 333
Anger 0.934783 0.903904 0.919084 333
Fear 0.888889 0.862275 0.875380 334
Surprise 0.890110 0.972973 0.929699 333
In [88]:
# Visualization: Metrics Bar Chart for Ensemble
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Precision
axes[0, 0].bar(ensemble_per_class['Emotion'], ensemble_per_class['Precision'],
color='skyblue', alpha=0.8, edgecolor='black')
axes[0, 0].set_title('Ensemble - Precision by Emotion', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Precision', fontsize=12)
axes[0, 0].set_ylim([0, 1.0])
axes[0, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(ensemble_per_class['Precision']):
axes[0, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Recall
axes[0, 1].bar(ensemble_per_class['Emotion'], ensemble_per_class['Recall'],
color='lightgreen', alpha=0.8, edgecolor='black')
axes[0, 1].set_title('Ensemble - Recall by Emotion', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Recall', fontsize=12)
axes[0, 1].set_ylim([0, 1.0])
axes[0, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[0, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(ensemble_per_class['Recall']):
axes[0, 1].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# F1-Score
axes[1, 0].bar(ensemble_per_class['Emotion'], ensemble_per_class['F1-Score'],
color='salmon', alpha=0.8, edgecolor='black')
axes[1, 0].set_title('Ensemble - F1-Score by Emotion', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('F1-Score', fontsize=12)
axes[1, 0].set_ylim([0, 1.0])
axes[1, 0].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(ensemble_per_class['F1-Score']):
axes[1, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold')
# Support
axes[1, 1].bar(ensemble_per_class['Emotion'], ensemble_per_class['Support'],
color='plum', alpha=0.8, edgecolor='black')
axes[1, 1].set_title('Ensemble - Sample Support by Emotion', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Number of Samples', fontsize=12)
axes[1, 1].grid(True, alpha=0.3, axis='y')
plt.setp(axes[1, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
for i, v in enumerate(ensemble_per_class['Support']):
axes[1, 1].text(i, v + 10, f'{int(v)}', ha='center', fontweight='bold')
plt.suptitle('Ensemble Model - Detailed Performance Metrics',
fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('ensemble_detailed_metrics.png', dpi=300, bbox_inches='tight')
plt.show()
In [89]:
# Classification Report
print(f"\n CLASSIFICATION REPORT (Test Set):")
print("-" * 80)
ensemble_report = classification_report(y_test, ensemble_test_pred,
target_names=list(emotion_labels.values()),
digits=4)
print(ensemble_report)
CLASSIFICATION REPORT (Test Set):
--------------------------------------------------------------------------------
precision recall f1-score support
Sadness 0.9480 0.9309 0.9394 333
Joy 0.9311 0.8503 0.8889 334
Love 0.9134 0.9820 0.9465 333
Anger 0.9348 0.9039 0.9191 333
Fear 0.8889 0.8623 0.8754 334
Surprise 0.8901 0.9730 0.9297 333
accuracy 0.9170 2000
macro avg 0.9177 0.9171 0.9165 2000
weighted avg 0.9177 0.9170 0.9164 2000
In [90]:
# Confusion Matrix
ensemble_cm = confusion_matrix(y_test, ensemble_test_pred)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(ensemble_cm, annot=True, fmt='d', cmap='Purples',
xticklabels=list(emotion_labels.values()),
yticklabels=list(emotion_labels.values()),
cbar_kws={'label': 'Count'})
ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
ax.set_title('Ensemble Model - Confusion Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('ensemble_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()
In [91]:
# Voting Analysis - Show how each model voted
print("\n ENSEMBLE VOTING ANALYSIS:")
print("-" * 80)
# Get predictions from each base model
mnb_preds = mnb_model.predict(X_test_tfidf)
lr_preds = lr_model.predict(X_test_tfidf)
svm_preds = svm_model.predict(X_test_tfidf)
# Analyze voting patterns
voting_agreement = []
for i in range(len(y_test)):
votes = [mnb_preds[i], lr_preds[i], svm_preds[i]]
unique_votes = len(set(votes))
if unique_votes == 1:
agreement = 'Unanimous (3/3)'
elif unique_votes == 2:
agreement = 'Majority (2/3)'
else:
agreement = 'Split (1/1/1)'
voting_agreement.append(agreement)
voting_counts = pd.Series(voting_agreement).value_counts()
print("\nVoting Agreement Distribution:")
for agreement, count in voting_counts.items():
percentage = count / len(y_test) * 100
print(f" {agreement:.<25} {count:>5} ({percentage:.2f}%)")
# Visualization: Voting Agreement
fig, ax = plt.subplots(figsize=(10, 7))
colors_vote = ['#2ecc71', '#f39c12', '#e74c3c']
wedges, texts, autotexts = ax.pie(
voting_counts.values,
labels=voting_counts.index,
autopct='%1.1f%%',
startangle=90,
colors=colors_vote,
textprops={'fontsize': 12, 'fontweight': 'bold'}
)
ax.set_title('Ensemble Voting Agreement Distribution', fontsize=14, fontweight='bold', pad=20)
plt.setp(autotexts, size=11, weight="bold", color="white")
plt.tight_layout()
plt.savefig('ensemble_voting_agreement.png', dpi=300, bbox_inches='tight')
plt.show()
ENSEMBLE VOTING ANALYSIS: -------------------------------------------------------------------------------- Voting Agreement Distribution: Unanimous (3/3).......... 1824 (91.20%) Majority (2/3)........... 173 (8.65%) Split (1/1/1)............ 3 (0.15%)
SAMPLE PREDICTIONS WITH VOTING BREAKDOWN¶
In [92]:
# Sample Predictions with Individual Model Votes
print("\n ENSEMBLE - SAMPLE PREDICTIONS WITH VOTING BREAKDOWN:")
print("=" * 150)
sample_indices = np.random.choice(len(test_df), 10, replace=False)
ensemble_samples = []
for idx in sample_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
true_label = y_test[idx]
ensemble_pred = ensemble_test_pred[idx]
# Get individual predictions
mnb_pred = mnb_preds[idx]
lr_pred = lr_preds[idx]
svm_pred = svm_preds[idx]
print(f"\nSample {len(ensemble_samples) + 1}:")
print(f"Text: {text}")
print(f"True Emotion: {emotion_labels[true_label]}")
print(f"Voting Breakdown:")
print(f" Naive Bayes: {emotion_labels[mnb_pred]}")
print(f" Logistic Regression: {emotion_labels[lr_pred]}")
print(f" SVM: {emotion_labels[svm_pred]}")
print(f"Final Prediction: {emotion_labels[ensemble_pred]} {'CORRECT' if true_label == ensemble_pred else 'INCORRECT'}")
ensemble_samples.append({
'Text': text[:50] + '...' if len(text) > 50 else text,
'True': emotion_labels[true_label],
'MNB': emotion_labels[mnb_pred],
'LR': emotion_labels[lr_pred],
'SVM': emotion_labels[svm_pred],
'Ensemble': emotion_labels[ensemble_pred],
'Correct': 'TRUE' if true_label == ensemble_pred else 'FALSE'
})
ensemble_samples_df = pd.DataFrame(ensemble_samples)
print("\n" + "=" * 100)
print("SUMMARY TABLE:")
print(ensemble_samples_df.to_string(index=False))
ENSEMBLE - SAMPLE PREDICTIONS WITH VOTING BREAKDOWN:
======================================================================================================================================================
Sample 1:
Text: i feel terrified worried concerned confused frustrated impatient
True Emotion: Fear
Voting Breakdown:
Naive Bayes: Fear
Logistic Regression: Fear
SVM: Fear
Final Prediction: Fear CORRECT
Sample 2:
Text: i feel humiliated to have it revealed outside of the house itself
True Emotion: Sadness
Voting Breakdown:
Naive Bayes: Sadness
Logistic Regression: Sadness
SVM: Sadness
Final Prediction: Sadness CORRECT
Sample 3:
Text: i feel glad to have let this go i dont have to bother with them they arent in my life that is empowering liberating
True Emotion: Joy
Voting Breakdown:
Naive Bayes: Joy
Logistic Regression: Joy
SVM: Joy
Final Prediction: Joy CORRECT
Sample 4:
Text: i have a feeling sims will either be horrible or awesome
True Emotion: Sadness
Voting Breakdown:
Naive Bayes: Sadness
Logistic Regression: Sadness
SVM: Sadness
Final Prediction: Sadness CORRECT
Sample 5:
Text: im in toal agreement with his views because i feel just that way about my own beloved partner
True Emotion: Love
Voting Breakdown:
Naive Bayes: Love
Logistic Regression: Love
SVM: Love
Final Prediction: Love CORRECT
Sample 6:
Text: i feel so furious and vengeful when children are harmed
True Emotion: Anger
Voting Breakdown:
Naive Bayes: Anger
Logistic Regression: Anger
SVM: Anger
Final Prediction: Anger CORRECT
Sample 7:
Text: ive feeling slightly bitchy so there we go
True Emotion: Anger
Voting Breakdown:
Naive Bayes: Anger
Logistic Regression: Anger
SVM: Anger
Final Prediction: Anger CORRECT
Sample 8:
Text: i only can feels that you are really so gentle and full filled with the sense of security
True Emotion: Love
Voting Breakdown:
Naive Bayes: Love
Logistic Regression: Love
SVM: Love
Final Prediction: Love CORRECT
Sample 9:
Text: i did a quick check feeling rather hesitant to type vagina emoticon into a search engine though it might make a rather arresting name for an emo band
True Emotion: Fear
Voting Breakdown:
Naive Bayes: Surprise
Logistic Regression: Fear
SVM: Fear
Final Prediction: Fear CORRECT
Sample 10:
Text: i can imagine that cabin that feeling of security within its shaky walls
True Emotion: Fear
Voting Breakdown:
Naive Bayes: Fear
Logistic Regression: Fear
SVM: Fear
Final Prediction: Fear CORRECT
====================================================================================================
SUMMARY TABLE:
Text True MNB LR SVM Ensemble Correct
i feel terrified worried concerned confused frustr... Fear Fear Fear Fear Fear TRUE
i feel humiliated to have it revealed outside of t... Sadness Sadness Sadness Sadness Sadness TRUE
i feel glad to have let this go i dont have to bot... Joy Joy Joy Joy Joy TRUE
i have a feeling sims will either be horrible or a... Sadness Sadness Sadness Sadness Sadness TRUE
im in toal agreement with his views because i feel... Love Love Love Love Love TRUE
i feel so furious and vengeful when children are h... Anger Anger Anger Anger Anger TRUE
ive feeling slightly bitchy so there we go Anger Anger Anger Anger Anger TRUE
i only can feels that you are really so gentle and... Love Love Love Love Love TRUE
i did a quick check feeling rather hesitant to typ... Fear Surprise Fear Fear Fear TRUE
i can imagine that cabin that feeling of security ... Fear Fear Fear Fear Fear TRUE
MODEL COMPARISION¶
In [93]:
# comparison DataFrame
comparison_data_updated = {
'Model': ['Naive Bayes', 'Logistic Regression', 'SVM', 'Ensemble'],
'Train Accuracy': [mnb_train_acc, lr_train_acc, svm_train_acc, ensemble_train_acc],
'Validation Accuracy': [mnb_val_acc, lr_val_acc, svm_val_acc, ensemble_val_acc],
'Test Accuracy': [mnb_test_acc, lr_test_acc, svm_test_acc, ensemble_test_acc]
}
comparison_df_updated = pd.DataFrame(comparison_data_updated)
print("\n MODEL ACCURACY COMPARISON:")
print("-" * 80)
print(comparison_df_updated.to_string(index=False))
MODEL ACCURACY COMPARISON:
--------------------------------------------------------------------------------
Model Train Accuracy Validation Accuracy Test Accuracy
Naive Bayes 0.940684 0.8755 0.8835
Logistic Regression 0.956622 0.9040 0.9160
SVM 0.979186 0.9090 0.9130
Ensemble 0.961685 0.9060 0.9170
In [94]:
# Visualization: Updated Model Comparison
fig, ax = plt.subplots(figsize=(14, 7))
x = np.arange(len(comparison_df_updated['Model']))
width = 0.25
bars1 = ax.bar(x - width, comparison_df_updated['Train Accuracy'], width,
label='Train', alpha=0.8, color='skyblue')
bars2 = ax.bar(x, comparison_df_updated['Validation Accuracy'], width,
label='Validation', alpha=0.8, color='lightgreen')
bars3 = ax.bar(x + width, comparison_df_updated['Test Accuracy'], width,
label='Test', alpha=0.8, color='salmon')
ax.set_xlabel('Models', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax.set_title('All Traditional ML Models - Accuracy Comparison',
fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(comparison_df_updated['Model'])
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
ax.set_ylim([0, 1.0])
# Add value labels
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=9, fontweight='bold')
plt.tight_layout()
plt.savefig('all_ml_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
In [95]:
# ROC CURVES - MULTICLASS
# Binarize the labels for ROC curve
y_test_bin = label_binarize(y_test, classes=[0, 1, 2, 3, 4, 5])
n_classes = y_test_bin.shape[1]
# Naive Bayes probabilities
mnb_proba = mnb_model.predict_proba(X_test_tfidf)
# Logistic Regression probabilities
lr_proba = lr_model.predict_proba(X_test_tfidf)
# For SVM, we need to use decision_function and normalize
svm_decision = svm_model.decision_function(X_test_tfidf)
# Normalize SVM decision function to probabilities
from scipy.special import softmax
svm_proba = softmax(svm_decision, axis=1)
# Function to plot ROC curves
def plot_roc_curves(y_true_bin, y_proba, model_name, colors):
"""Plot ROC curves for multiclass classification"""
fig, ax = plt.subplots(figsize=(10, 8))
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_proba[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Plot ROC curves
emotion_names = list(emotion_labels.values())
for i, emotion in enumerate(emotion_names):
ax.plot(fpr[i], tpr[i], color=colors[i], lw=2,
label=f'{emotion} (AUC = {roc_auc[i]:.3f})')
# Plot diagonal
ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax.set_title(f'{model_name} - ROC Curves', fontsize=14, fontweight='bold')
ax.legend(loc="lower right", fontsize=10)
ax.grid(True, alpha=0.3)
return fig, roc_auc
# Define colors for emotions
emotion_colors = sns.color_palette("husl", 6)
# Plot ROC for Naive Bayes
print("\n Plotting ROC curves for Naive Bayes...")
fig_mnb, mnb_auc = plot_roc_curves(y_test_bin, mnb_proba,
'Multinomial Naive Bayes', emotion_colors)
plt.tight_layout()
plt.savefig('mnb_roc_curves.png', dpi=300, bbox_inches='tight')
plt.show()
Plotting ROC curves for Naive Bayes...
In [96]:
# Plot ROC for Logistic Regression
print("\n Plotting ROC curves for Logistic Regression...")
fig_lr, lr_auc = plot_roc_curves(y_test_bin, lr_proba,
'Logistic Regression', emotion_colors)
plt.tight_layout()
plt.savefig('lr_roc_curves.png', dpi=300, bbox_inches='tight')
plt.show()
Plotting ROC curves for Logistic Regression...
In [97]:
# Plot ROC for SVM
print("\n Plotting ROC curves for SVM...")
fig_svm, svm_auc = plot_roc_curves(y_test_bin, svm_proba,
'Support Vector Machine', emotion_colors)
plt.tight_layout()
plt.savefig('svm_roc_curves.png', dpi=300, bbox_inches='tight')
plt.show()
Plotting ROC curves for SVM...
AUC SCORE COMPARISION¶
In [98]:
# Create comparison DataFrame for AUC scores
auc_comparison = pd.DataFrame({
'Emotion': list(emotion_labels.values()),
'Naive Bayes': [mnb_auc[i] for i in range(n_classes)],
'Logistic Regression': [lr_auc[i] for i in range(n_classes)],
'SVM': [svm_auc[i] for i in range(n_classes)]
})
print("\n AUC SCORES BY EMOTION:")
print("-" * 80)
print(auc_comparison.to_string(index=False))
AUC SCORES BY EMOTION:
--------------------------------------------------------------------------------
Emotion Naive Bayes Logistic Regression SVM
Sadness 0.985679 0.991151 0.989382
Joy 0.970199 0.977783 0.977269
Love 0.992130 0.994689 0.995051
Anger 0.987862 0.994848 0.994772
Fear 0.978737 0.976669 0.976051
Surprise 0.986911 0.990130 0.990568
In [99]:
# Calculate macro-average AUC
macro_auc = {
'Naive Bayes': np.mean(list(mnb_auc.values())),
'Logistic Regression': np.mean(list(lr_auc.values())),
'SVM': np.mean(list(svm_auc.values()))
}
print("\n MACRO-AVERAGE AUC:")
print("-" * 80)
for model, auc_score in macro_auc.items():
print(f" {model:.<30} {auc_score:.4f}")
MACRO-AVERAGE AUC: -------------------------------------------------------------------------------- Naive Bayes................... 0.9836 Logistic Regression........... 0.9875 SVM........................... 0.9872
In [100]:
# Visualization: AUC Comparison
fig, ax = plt.subplots(figsize=(14, 8))
x = np.arange(len(auc_comparison['Emotion']))
width = 0.25
bars1 = ax.bar(x - width, auc_comparison['Naive Bayes'], width,
label='Naive Bayes', alpha=0.8)
bars2 = ax.bar(x, auc_comparison['Logistic Regression'], width,
label='Logistic Regression', alpha=0.8)
bars3 = ax.bar(x + width, auc_comparison['SVM'], width,
label='SVM', alpha=0.8)
ax.set_xlabel('Emotion', fontsize=12, fontweight='bold')
ax.set_ylabel('AUC Score', fontsize=12, fontweight='bold')
ax.set_title('ROC-AUC Scores Comparison by Emotion', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(auc_comparison['Emotion'], rotation=15, ha='right')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
ax.set_ylim([0, 1.0])
# Add value labels
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=8, fontweight='bold')
plt.tight_layout()
plt.savefig('auc_scores_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
MODEL COMPARISION STATISTICS¶
In [101]:
# Create comprehensive summary
summary_data = {
'Model': ['Naive Bayes', 'Logistic Regression', 'SVM', 'Ensemble'],
'Test Accuracy': [mnb_test_acc, lr_test_acc, svm_test_acc, ensemble_test_acc],
'Macro Avg F1': [
f1_score(y_test, mnb_test_pred, average='macro'),
f1_score(y_test, lr_test_pred, average='macro'),
f1_score(y_test, svm_test_pred, average='macro'),
f1_score(y_test, ensemble_test_pred, average='macro')
],
'Macro Avg Precision': [
precision_score(y_test, mnb_test_pred, average='macro'),
precision_score(y_test, lr_test_pred, average='macro'),
precision_score(y_test, svm_test_pred, average='macro'),
precision_score(y_test, ensemble_test_pred, average='macro')
],
'Macro Avg Recall': [
recall_score(y_test, mnb_test_pred, average='macro'),
recall_score(y_test, lr_test_pred, average='macro'),
recall_score(y_test, svm_test_pred, average='macro'),
recall_score(y_test, ensemble_test_pred, average='macro')
],
'Macro Avg AUC': [
macro_auc['Naive Bayes'],
macro_auc['Logistic Regression'],
macro_auc['SVM'],
np.nan # Ensemble doesn't have probabilities for AUC
]
}
summary_df = pd.DataFrame(summary_data)
print("\n COMPREHENSIVE MODEL SUMMARY:")
print("-" * 100)
print(summary_df.to_string(index=False))
COMPREHENSIVE MODEL SUMMARY:
----------------------------------------------------------------------------------------------------
Model Test Accuracy Macro Avg F1 Macro Avg Precision Macro Avg Recall Macro Avg AUC
Naive Bayes 0.8835 0.882856 0.886169 0.883566 0.983586
Logistic Regression 0.9160 0.915494 0.916605 0.916058 0.987545
SVM 0.9130 0.912618 0.913447 0.913051 0.987182
Ensemble 0.9170 0.916483 0.917725 0.917061 NaN
CUSTOM PREDICTION SYSTEM FOR ALL MODELS¶
In [102]:
# CUSTOM PREDICTION SYSTEM FOR ALL MODELS
class EmotionPredictor:
"""
Complete emotion prediction system for all models
"""
def __init__(self, preprocessor, vectorizer, models, emotion_labels):
self.preprocessor = preprocessor
self.vectorizer = vectorizer
self.models = models
self.emotion_labels = emotion_labels
def predict_single_model(self, text, model_name):
"""Predict emotion using a single model"""
# Preprocess
cleaned_text = self.preprocessor.preprocess(text)
text_tfidf = self.vectorizer.transform([cleaned_text])
# Get model
model = self.models[model_name]
# Predict
prediction = model.predict(text_tfidf)[0]
# Get probabilities if available
if hasattr(model, 'predict_proba'):
probabilities = model.predict_proba(text_tfidf)[0]
elif hasattr(model, 'decision_function'):
from scipy.special import softmax
decision = model.decision_function(text_tfidf)[0]
probabilities = softmax(decision)
else:
probabilities = None
return prediction, probabilities
def predict_all_models(self, text, show_details=True):
"""Predict using all models and show comparison"""
results = {}
for model_name in self.models.keys():
prediction, probabilities = self.predict_single_model(text, model_name)
results[model_name] = {
'prediction': self.emotion_labels[prediction],
'prediction_label': prediction,
'probabilities': probabilities
}
if show_details:
print(f"\n{'='*80}")
print(f"TEXT: {text}")
print(f"{'='*80}")
for model_name, result in results.items():
print(f"\n{model_name}:")
print(f" Predicted Emotion: {result['prediction']}")
if result['probabilities'] is not None:
print(f" Confidence Scores:")
# Print as a neat table with percentages
for emotion_name, prob in zip(self.emotion_labels.values(),
result['probabilities']):
print(f" {emotion_name:<15} {prob*100:6.2f}%")
return results
def predict_ensemble(self, text):
"""Predict using ensemble approach"""
cleaned_text = self.preprocessor.preprocess(text)
text_tfidf = self.vectorizer.transform([cleaned_text])
ensemble_pred = ensemble_model.predict(text_tfidf)[0]
# Get individual predictions
predictions = {}
for model_name in self.models.keys():
if model_name != 'Ensemble':
pred = self.models[model_name].predict(text_tfidf)[0]
predictions[model_name] = self.emotion_labels[pred]
return self.emotion_labels[ensemble_pred], predictions
# Initialize predictor
models_dict = {
'Naive Bayes': mnb_model,
'Logistic Regression': lr_model,
'SVM': svm_model
}
predictor = EmotionPredictor(preprocessor, tfidf_vectorizer, models_dict, emotion_labels)
In [103]:
custom_texts = [
"The sound outside my window makes me so nervous. I feel unsafe and scared.",
"I just won first place in the competition! I'm so thrilled and proud of myself.",
"Ever since you left, I feel broken inside. The pain of losing you never fades.",
"This is outrageous! I can't believe they treated me with such disrespect.",
"I opened the box and found a brand new phone inside. I was completely shocked!",
"You are my everything. I cherish you more than words could ever express."
]
prediction_results = []
for i, text in enumerate(custom_texts, 1):
print(f"CUSTOM PREDICTION #{i}")
results = predictor.predict_all_models(text, show_details=True)
# Get ensemble prediction
ensemble_pred, individual_preds = predictor.predict_ensemble(text)
print(f"\n ENSEMBLE VOTING:")
print(f" Individual Votes:")
for model, pred in individual_preds.items():
print(f" {model:.<25} {pred}")
print(f" Final Ensemble Prediction: {ensemble_pred}")
# Store for summary
prediction_results.append({
'Text': text[:40] + '...' if len(text) > 40 else text,
'Naive Bayes': results['Naive Bayes']['prediction'],
'Logistic Reg': results['Logistic Regression']['prediction'],
'SVM': results['SVM']['prediction'],
'Ensemble': ensemble_pred
})
CUSTOM PREDICTION #1
================================================================================
TEXT: The sound outside my window makes me so nervous. I feel unsafe and scared.
================================================================================
Naive Bayes:
Predicted Emotion: Fear
Confidence Scores:
Sadness 3.53%
Joy 5.24%
Love 4.64%
Anger 10.66%
Fear 67.94%
Surprise 8.00%
Logistic Regression:
Predicted Emotion: Fear
Confidence Scores:
Sadness 1.00%
Joy 1.54%
Love 1.22%
Anger 1.97%
Fear 93.15%
Surprise 1.11%
SVM:
Predicted Emotion: Fear
Confidence Scores:
Sadness 2.54%
Joy 2.86%
Love 5.99%
Anger 7.19%
Fear 75.49%
Surprise 5.93%
ENSEMBLE VOTING:
Individual Votes:
Naive Bayes.............. Fear
Logistic Regression...... Fear
SVM...................... Fear
Final Ensemble Prediction: Fear
CUSTOM PREDICTION #2
================================================================================
TEXT: I just won first place in the competition! I'm so thrilled and proud of myself.
================================================================================
Naive Bayes:
Predicted Emotion: Joy
Confidence Scores:
Sadness 11.72%
Joy 54.32%
Love 6.18%
Anger 7.56%
Fear 9.45%
Surprise 10.77%
Logistic Regression:
Predicted Emotion: Joy
Confidence Scores:
Sadness 4.04%
Joy 84.55%
Love 2.14%
Anger 3.67%
Fear 3.60%
Surprise 2.00%
SVM:
Predicted Emotion: Joy
Confidence Scores:
Sadness 5.63%
Joy 69.81%
Love 5.43%
Anger 7.58%
Fear 7.54%
Surprise 4.01%
ENSEMBLE VOTING:
Individual Votes:
Naive Bayes.............. Joy
Logistic Regression...... Joy
SVM...................... Joy
Final Ensemble Prediction: Joy
CUSTOM PREDICTION #3
================================================================================
TEXT: Ever since you left, I feel broken inside. The pain of losing you never fades.
================================================================================
Naive Bayes:
Predicted Emotion: Fear
Confidence Scores:
Sadness 22.78%
Joy 12.25%
Love 14.42%
Anger 12.69%
Fear 26.13%
Surprise 11.73%
Logistic Regression:
Predicted Emotion: Sadness
Confidence Scores:
Sadness 23.79%
Joy 16.77%
Love 13.97%
Anger 14.41%
Fear 21.07%
Surprise 9.99%
SVM:
Predicted Emotion: Fear
Confidence Scores:
Sadness 16.93%
Joy 16.96%
Love 17.13%
Anger 15.26%
Fear 19.40%
Surprise 14.32%
ENSEMBLE VOTING:
Individual Votes:
Naive Bayes.............. Fear
Logistic Regression...... Sadness
SVM...................... Fear
Final Ensemble Prediction: Fear
CUSTOM PREDICTION #4
================================================================================
TEXT: This is outrageous! I can't believe they treated me with such disrespect.
================================================================================
Naive Bayes:
Predicted Emotion: Anger
Confidence Scores:
Sadness 9.53%
Joy 13.03%
Love 21.44%
Anger 29.78%
Fear 11.69%
Surprise 14.53%
Logistic Regression:
Predicted Emotion: Anger
Confidence Scores:
Sadness 13.24%
Joy 15.84%
Love 16.72%
Anger 32.31%
Fear 15.12%
Surprise 6.78%
SVM:
Predicted Emotion: Anger
Confidence Scores:
Sadness 11.36%
Joy 6.26%
Love 21.61%
Anger 34.06%
Fear 16.10%
Surprise 10.61%
ENSEMBLE VOTING:
Individual Votes:
Naive Bayes.............. Anger
Logistic Regression...... Anger
SVM...................... Anger
Final Ensemble Prediction: Anger
CUSTOM PREDICTION #5
================================================================================
TEXT: I opened the box and found a brand new phone inside. I was completely shocked!
================================================================================
Naive Bayes:
Predicted Emotion: Surprise
Confidence Scores:
Sadness 7.47%
Joy 9.94%
Love 9.02%
Anger 7.68%
Fear 18.21%
Surprise 47.69%
Logistic Regression:
Predicted Emotion: Surprise
Confidence Scores:
Sadness 4.14%
Joy 4.97%
Love 3.62%
Anger 3.48%
Fear 8.04%
Surprise 75.74%
SVM:
Predicted Emotion: Surprise
Confidence Scores:
Sadness 5.48%
Joy 7.34%
Love 8.74%
Anger 5.34%
Fear 14.28%
Surprise 58.83%
ENSEMBLE VOTING:
Individual Votes:
Naive Bayes.............. Surprise
Logistic Regression...... Surprise
SVM...................... Surprise
Final Ensemble Prediction: Surprise
CUSTOM PREDICTION #6
================================================================================
TEXT: You are my everything. I cherish you more than words could ever express.
================================================================================
Naive Bayes:
Predicted Emotion: Love
Confidence Scores:
Sadness 12.37%
Joy 15.21%
Love 23.46%
Anger 16.31%
Fear 15.88%
Surprise 16.77%
Logistic Regression:
Predicted Emotion: Joy
Confidence Scores:
Sadness 19.43%
Joy 24.40%
Love 16.76%
Anger 12.71%
Fear 18.40%
Surprise 8.31%
SVM:
Predicted Emotion: Sadness
Confidence Scores:
Sadness 18.78%
Joy 18.51%
Love 18.69%
Anger 13.50%
Fear 17.69%
Surprise 12.83%
ENSEMBLE VOTING:
Individual Votes:
Naive Bayes.............. Love
Logistic Regression...... Joy
SVM...................... Sadness
Final Ensemble Prediction: Sadness
In [104]:
# Summary table
print("CUSTOM PREDICTIONS SUMMARY")
prediction_results_df = pd.DataFrame(prediction_results)
print(prediction_results_df.to_string(index=False))
CUSTOM PREDICTIONS SUMMARY
Text Naive Bayes Logistic Reg SVM Ensemble
The sound outside my window makes me so ... Fear Fear Fear Fear
I just won first place in the competitio... Joy Joy Joy Joy
Ever since you left, I feel broken insid... Fear Sadness Fear Fear
This is outrageous! I can't believe they... Anger Anger Anger Anger
I opened the box and found a brand new p... Surprise Surprise Surprise Surprise
You are my everything. I cherish you mor... Love Joy Sadness Sadness
In [105]:
# Visualize custom predictions
fig, ax = plt.subplots(figsize=(14, 8))
# Prepare data for grouped bar chart
x = np.arange(len(custom_texts))
width = 0.2
models_list = ['Naive Bayes', 'Logistic Reg', 'SVM', 'Ensemble']
colors_custom = ['skyblue', 'lightgreen', 'salmon', 'plum']
for idx, (model, color) in enumerate(zip(models_list, colors_custom)):
# Convert emotions to numeric for visualization
emotion_to_num = {v: k for k, v in emotion_labels.items()}
values = [emotion_to_num[row[model]] for _, row in prediction_results_df.iterrows()]
offset = width * (idx - 1.5)
ax.bar(x + offset, values, width, label=model, color=color, alpha=0.8)
ax.set_xlabel('Custom Text Samples', fontsize=12, fontweight='bold')
ax.set_ylabel('Predicted Emotion (Numeric)', fontsize=12, fontweight='bold')
ax.set_title('Custom Predictions Comparison Across All Models', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([f'Text {i+1}' for i in range(len(custom_texts))])
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
# Add emotion labels on y-axis
emotion_ticks = list(emotion_labels.keys())
emotion_names = list(emotion_labels.values())
ax.set_yticks(emotion_ticks)
ax.set_yticklabels(emotion_names)
plt.tight_layout()
plt.savefig('custom_predictions_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
Agreement analysis for custom predictions¶
In [106]:
# Agreement analysis for custom predictions
print("\n MODEL AGREEMENT ON CUSTOM PREDICTIONS:")
print("-" * 80)
for i, row in prediction_results_df.iterrows():
predictions_set = set([row['Naive Bayes'], row['Logistic Reg'],
row['SVM'], row['Ensemble']])
if len(predictions_set) == 1:
agreement = "All models agree "
elif len(predictions_set) == 2:
agreement = "Partial agreement"
else:
agreement = "Models disagree"
print(f"Text {i+1}: {agreement}")
print(f" Predictions: NB={row['Naive Bayes']}, LR={row['Logistic Reg']}, "
f"SVM={row['SVM']}, Ensemble={row['Ensemble']}")
MODEL AGREEMENT ON CUSTOM PREDICTIONS: -------------------------------------------------------------------------------- Text 1: All models agree Predictions: NB=Fear, LR=Fear, SVM=Fear, Ensemble=Fear Text 2: All models agree Predictions: NB=Joy, LR=Joy, SVM=Joy, Ensemble=Joy Text 3: Partial agreement Predictions: NB=Fear, LR=Sadness, SVM=Fear, Ensemble=Fear Text 4: All models agree Predictions: NB=Anger, LR=Anger, SVM=Anger, Ensemble=Anger Text 5: All models agree Predictions: NB=Surprise, LR=Surprise, SVM=Surprise, Ensemble=Surprise Text 6: Models disagree Predictions: NB=Love, LR=Joy, SVM=Sadness, Ensemble=Sadness
In [107]:
from sklearn.model_selection import (
GridSearchCV, RandomizedSearchCV, cross_val_score,
StratifiedKFold, learning_curve
)
from sklearn.metrics import make_scorer, f1_score
SETUP CROSS-VALIDATION STRATEGY¶
In [108]:
print("\n CROSS-VALIDATION SETUP:")
# Use Stratified K-Fold to maintain class distribution
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
print(" Using Stratified 5-Fold Cross-Validation")
print(" - Maintains class distribution in each fold")
print(" - Reduces variance in performance estimates")
print(" - More reliable than simple train-test split")
CROSS-VALIDATION SETUP: Using Stratified 5-Fold Cross-Validation - Maintains class distribution in each fold - Reduces variance in performance estimates - More reliable than simple train-test split
BASELINE CROSS-VALIDATION SCORES¶
In [109]:
print("\n" + "=" * 80)
print("BASELINE MODEL PERFORMANCE (Before Tuning)")
print("=" * 80)
def evaluate_model_cv(model, X, y, cv, model_name):
"""Evaluate model using cross-validation"""
# Accuracy
accuracy_scores = cross_val_score(model, X, y, cv=cv,
scoring='accuracy', n_jobs=-1)
# F1-Score (macro)
f1_macro_scores = cross_val_score(model, X, y, cv=cv,
scoring='f1_macro', n_jobs=-1)
# F1-Score (weighted)
f1_weighted_scores = cross_val_score(model, X, y, cv=cv,
scoring='f1_weighted', n_jobs=-1)
# Precision (macro)
precision_scores = cross_val_score(model, X, y, cv=cv,
scoring='precision_macro', n_jobs=-1)
# Recall (macro)
recall_scores = cross_val_score(model, X, y, cv=cv,
scoring='recall_macro', n_jobs=-1)
results = {
'Model': model_name,
'Accuracy_Mean': accuracy_scores.mean(),
'Accuracy_Std': accuracy_scores.std(),
'F1_Macro_Mean': f1_macro_scores.mean(),
'F1_Macro_Std': f1_macro_scores.std(),
'F1_Weighted_Mean': f1_weighted_scores.mean(),
'F1_Weighted_Std': f1_weighted_scores.std(),
'Precision_Mean': precision_scores.mean(),
'Precision_Std': precision_scores.std(),
'Recall_Mean': recall_scores.mean(),
'Recall_Std': recall_scores.std()
}
return results
# Evaluate baseline models
print("\n Evaluating baseline models with 5-fold CV...")
baseline_results = []
# Naive Bayes
print("\n Evaluating Naive Bayes...")
mnb_cv_results = evaluate_model_cv(mnb_model, X_train_tfidf, y_train,
cv_strategy, 'Naive Bayes (Baseline)')
baseline_results.append(mnb_cv_results)
# Logistic Regression
print(" Evaluating Logistic Regression...")
lr_cv_results = evaluate_model_cv(lr_model, X_train_tfidf, y_train,
cv_strategy, 'Logistic Regression (Baseline)')
baseline_results.append(lr_cv_results)
# SVM
print(" Evaluating SVM...")
svm_cv_results = evaluate_model_cv(svm_model, X_train_tfidf, y_train,
cv_strategy, 'SVM (Baseline)')
baseline_results.append(svm_cv_results)
# Display baseline results
baseline_df = pd.DataFrame(baseline_results)
print("\n BASELINE CROSS-VALIDATION RESULTS:")
print("=" * 80)
print(f"\n{'Model':<35} {'Accuracy':<20} {'F1-Macro':<20}")
print("-" * 80)
for _, row in baseline_df.iterrows():
print(f"{row['Model']:<35} {row['Accuracy_Mean']:.4f} ± {row['Accuracy_Std']:.4f} "
f"{row['F1_Macro_Mean']:.4f} ± {row['F1_Macro_Std']:.4f}")
================================================================================ BASELINE MODEL PERFORMANCE (Before Tuning) ================================================================================ Evaluating baseline models with 5-fold CV... Evaluating Naive Bayes... Evaluating Logistic Regression... Evaluating SVM... BASELINE CROSS-VALIDATION RESULTS: ================================================================================ Model Accuracy F1-Macro -------------------------------------------------------------------------------- Naive Bayes (Baseline) 0.8736 ± 0.0035 0.8731 ± 0.0035 Logistic Regression (Baseline) 0.9109 ± 0.0030 0.9104 ± 0.0031 SVM (Baseline) 0.9124 ± 0.0035 0.9121 ± 0.0036
HYPERPARAMETER TUNING - NAIVE BAYES¶
In [110]:
# HYPERPARAMETER TUNING - NAIVE BAYES
print("\n" + "=" * 80)
print("HYPERPARAMETER TUNING - MULTINOMIAL NAIVE BAYES")
print("=" * 80)
print("\n Testing different alpha values (smoothing parameter)...")
# Parameter grid for Naive Bayes
mnb_param_grid = {
'alpha': [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
}
# Grid Search with CV
mnb_grid_search = GridSearchCV(
MultinomialNB(),
param_grid=mnb_param_grid,
cv=cv_strategy,
scoring='f1_macro',
n_jobs=-1,
verbose=1
)
mnb_grid_search.fit(X_train_tfidf, y_train)
print(f"\n Best Parameters: {mnb_grid_search.best_params_}")
print(f" Best CV F1-Score: {mnb_grid_search.best_score_:.4f}")
# Train best model
best_mnb = mnb_grid_search.best_estimator_
best_mnb_pred = best_mnb.predict(X_test_tfidf)
best_mnb_acc = accuracy_score(y_test, best_mnb_pred)
best_mnb_f1 = f1_score(y_test, best_mnb_pred, average='macro')
print(f"\n TEST SET PERFORMANCE:")
print(f" Accuracy: {best_mnb_acc:.4f} ({best_mnb_acc*100:.2f}%)")
print(f" F1-Score: {best_mnb_f1:.4f}")
print(f" Improvement over baseline: {(best_mnb_f1 - mnb_cv_results['F1_Macro_Mean'])*100:+.2f}%")
================================================================================
HYPERPARAMETER TUNING - MULTINOMIAL NAIVE BAYES
================================================================================
Testing different alpha values (smoothing parameter)...
Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best Parameters: {'alpha': 2.0}
Best CV F1-Score: 0.8809
TEST SET PERFORMANCE:
Accuracy: 0.8905 (89.05%)
F1-Score: 0.8897
Improvement over baseline: +1.66%
In [111]:
# Visualize hyperparameter search
mnb_results_df = pd.DataFrame(mnb_grid_search.cv_results_)
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(mnb_param_grid['alpha'], mnb_results_df['mean_test_score'],
'o-', linewidth=2, markersize=8, label='Mean CV Score')
ax.fill_between(mnb_param_grid['alpha'],
mnb_results_df['mean_test_score'] - mnb_results_df['std_test_score'],
mnb_results_df['mean_test_score'] + mnb_results_df['std_test_score'],
alpha=0.3)
ax.set_xlabel('Alpha (Smoothing Parameter)', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Score (Macro)', fontsize=12, fontweight='bold')
ax.set_title('Naive Bayes - Hyperparameter Tuning Results', fontsize=14, fontweight='bold')
ax.set_xscale('log')
ax.grid(True, alpha=0.3)
ax.legend()
plt.tight_layout()
plt.savefig('mnb_hyperparameter_tuning.png', dpi=300, bbox_inches='tight')
plt.show()
HYPERPARAMETER TUNING - LOGISTIC REGRESSION¶
In [112]:
print("\n" + "=" * 80)
print("HYPERPARAMETER TUNING - LOGISTIC REGRESSION")
print("=" * 80)
print("\n Testing different C and solver combinations...")
# Parameter grid for Logistic Regression
lr_param_grid = {
'C': [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 50.0, 100.0],
'solver': ['lbfgs', 'liblinear', 'saga'],
'max_iter': [1000, 2000]
}
# Use RandomizedSearchCV for faster search
lr_random_search = RandomizedSearchCV(
LogisticRegression( random_state=42),
param_distributions=lr_param_grid,
n_iter=30, # Test 30 random combinations
cv=cv_strategy,
scoring='f1_macro',
n_jobs=-1,
verbose=1,
random_state=42
)
lr_random_search.fit(X_train_tfidf, y_train)
print(f"\n Best Parameters: {lr_random_search.best_params_}")
print(f" Best CV F1-Score: {lr_random_search.best_score_:.4f}")
# Train best model
best_lr = lr_random_search.best_estimator_
best_lr_pred = best_lr.predict(X_test_tfidf)
best_lr_acc = accuracy_score(y_test, best_lr_pred)
best_lr_f1 = f1_score(y_test, best_lr_pred, average='macro')
print(f"\n TEST SET PERFORMANCE:")
print(f" Accuracy: {best_lr_acc:.4f} ({best_lr_acc*100:.2f}%)")
print(f" F1-Score: {best_lr_f1:.4f}")
print(f" Improvement over baseline: {(best_lr_f1 - lr_cv_results['F1_Macro_Mean'])*100:+.2f}%")
================================================================================
HYPERPARAMETER TUNING - LOGISTIC REGRESSION
================================================================================
Testing different C and solver combinations...
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best Parameters: {'solver': 'liblinear', 'max_iter': 1000, 'C': 2.0}
Best CV F1-Score: 0.9148
TEST SET PERFORMANCE:
Accuracy: 0.9200 (92.00%)
F1-Score: 0.9195
Improvement over baseline: +0.90%
In [113]:
# Visualize top parameters
lr_results_df = pd.DataFrame(lr_random_search.cv_results_)
top_10 = lr_results_df.nlargest(10, 'mean_test_score')
fig, ax = plt.subplots(figsize=(12, 6))
x_pos = np.arange(len(top_10))
ax.bar(x_pos, top_10['mean_test_score'], yerr=top_10['std_test_score'],
alpha=0.8, capsize=5, edgecolor='black')
ax.set_xlabel('Parameter Combinations (Top 10)', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Score (Macro)', fontsize=12, fontweight='bold')
ax.set_title('Logistic Regression - Top 10 Hyperparameter Combinations',
fontsize=14, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'#{i+1}' for i in range(len(top_10))], rotation=0)
ax.grid(True, alpha=0.3, axis='y')
# Add value labels
for i, v in enumerate(top_10['mean_test_score']):
ax.text(i, v + 0.005, f'{v:.4f}', ha='center', va='bottom',
fontweight='bold', fontsize=9)
plt.tight_layout()
plt.savefig('lr_hyperparameter_tuning.png', dpi=300, bbox_inches='tight')
plt.show()
HYPERPARAMETER TUNING - SVM¶
In [114]:
print("\n" + "=" * 80)
print("HYPERPARAMETER TUNING - SUPPORT VECTOR MACHINE")
print("=" * 80)
print("\n Testing different C values and loss functions...")
# Parameter grid for SVM
svm_param_grid = {
'C': [0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 50.0],
'loss': ['hinge', 'squared_hinge'],
'max_iter': [2000, 3000]
}
svm_random_search = RandomizedSearchCV(
LinearSVC(dual=False, random_state=42),
param_distributions=svm_param_grid,
n_iter=20,
cv=cv_strategy,
scoring='f1_macro',
n_jobs=-1,
verbose=1,
random_state=42
)
svm_random_search.fit(X_train_tfidf, y_train)
print(f"\n Best Parameters: {svm_random_search.best_params_}")
print(f" Best CV F1-Score: {svm_random_search.best_score_:.4f}")
# Train best model
best_svm = svm_random_search.best_estimator_
best_svm_pred = best_svm.predict(X_test_tfidf)
best_svm_acc = accuracy_score(y_test, best_svm_pred)
best_svm_f1 = f1_score(y_test, best_svm_pred, average='macro')
print(f"\n TEST SET PERFORMANCE:")
print(f" Accuracy: {best_svm_acc:.4f} ({best_svm_acc*100:.2f}%)")
print(f" F1-Score: {best_svm_f1:.4f}")
print(f" Improvement over baseline: {(best_svm_f1 - svm_cv_results['F1_Macro_Mean'])*100:+.2f}%")
================================================================================
HYPERPARAMETER TUNING - SUPPORT VECTOR MACHINE
================================================================================
Testing different C values and loss functions...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Parameters: {'max_iter': 3000, 'loss': 'squared_hinge', 'C': 0.5}
Best CV F1-Score: 0.9169
TEST SET PERFORMANCE:
Accuracy: 0.9190 (91.90%)
F1-Score: 0.9185
Improvement over baseline: +0.64%
In [115]:
# Visualize C parameter effect
svm_results_df = pd.DataFrame(svm_random_search.cv_results_)
# Group by C value
c_values = []
c_scores = []
c_stds = []
for c in svm_param_grid['C']:
mask = svm_results_df['param_C'] == c
if mask.any():
c_values.append(c)
c_scores.append(svm_results_df[mask]['mean_test_score'].max())
c_stds.append(svm_results_df[mask]['std_test_score'].mean())
fig, ax = plt.subplots(figsize=(12, 6))
ax.errorbar(c_values, c_scores, yerr=c_stds, fmt='o-', linewidth=2,
markersize=8, capsize=5, label='Best score for each C')
ax.set_xlabel('C (Regularization Parameter)', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Score (Macro)', fontsize=12, fontweight='bold')
ax.set_title('SVM - Effect of Regularization Parameter C', fontsize=14, fontweight='bold')
ax.set_xscale('log')
ax.grid(True, alpha=0.3)
ax.legend()
plt.tight_layout()
plt.savefig('svm_hyperparameter_tuning.png', dpi=300, bbox_inches='tight')
plt.show()
WHY MODELS CONFUSE SIMILAR EMOTIONS¶
In [116]:
print("\n" + "=" * 80)
print("ANALYSIS: WHY FEAR & SADNESS ARE CONFUSED")
print("=" * 80)
print("""
ROOT CAUSES OF EMOTION CONFUSION:
1. SEMANTIC OVERLAP:
- Fear and Sadness share negative sentiment
- Both express distress and discomfort
- Similar vocabulary: "worried", "upset", "bad", "terrible"
2. CONTEXTUAL AMBIGUITY:
- "I'm worried about losing you" → Fear or Sadness?
- "This is terrible" → Anger, Fear, or Sadness?
- "I don't know what to do" → Fear, Sadness, or Surprise?
3. TF-IDF LIMITATIONS:
- Doesn't capture word order or context
- "not happy" vs "happy" treated independently
- Misses subtle emotional nuances
4. CLASS BOUNDARY OVERLAP:
- Emotions aren't discrete categories
- People can feel multiple emotions simultaneously
- Annotation subjectivity in training data
5. LIMITED FEATURES:
- Bag-of-words loses sequential information
- No consideration of:
* Syntax and grammar
* Negations and intensifiers
* Contextual relationships
""")
================================================================================
ANALYSIS: WHY FEAR & SADNESS ARE CONFUSED
================================================================================
ROOT CAUSES OF EMOTION CONFUSION:
1. SEMANTIC OVERLAP:
- Fear and Sadness share negative sentiment
- Both express distress and discomfort
- Similar vocabulary: "worried", "upset", "bad", "terrible"
2. CONTEXTUAL AMBIGUITY:
- "I'm worried about losing you" → Fear or Sadness?
- "This is terrible" → Anger, Fear, or Sadness?
- "I don't know what to do" → Fear, Sadness, or Surprise?
3. TF-IDF LIMITATIONS:
- Doesn't capture word order or context
- "not happy" vs "happy" treated independently
- Misses subtle emotional nuances
4. CLASS BOUNDARY OVERLAP:
- Emotions aren't discrete categories
- People can feel multiple emotions simultaneously
- Annotation subjectivity in training data
5. LIMITED FEATURES:
- Bag-of-words loses sequential information
- No consideration of:
* Syntax and grammar
* Negations and intensifiers
* Contextual relationships
DETAILED CONFUSION ANALYSIS¶
In [117]:
def analyze_confusion_pairs(y_true, y_pred, emotion_labels):
"""Analyze which emotion pairs are most confused"""
cm = confusion_matrix(y_true, y_pred)
confusion_pairs = []
for i in range(len(emotion_labels)):
for j in range(len(emotion_labels)):
if i != j: # Off-diagonal elements
count = cm[i][j]
if count > 0:
confusion_pairs.append({
'True_Emotion': emotion_labels[i],
'Predicted_As': emotion_labels[j],
'Count': count,
'Percentage': count / cm[i].sum() * 100
})
# Sort by count
confusion_df = pd.DataFrame(confusion_pairs)
confusion_df = confusion_df.sort_values('Count', ascending=False)
return confusion_df, cm
# Analyze best model (tuned Logistic Regression)
confusion_df, cm = analyze_confusion_pairs(y_test, best_lr_pred, emotion_labels)
print("\n TOP 20 MOST COMMON CONFUSION PAIRS:")
print("=" * 50)
print(confusion_df.head(20).to_string(index=False))
TOP 20 MOST COMMON CONFUSION PAIRS:
==================================================
True_Emotion Predicted_As Count Percentage
Fear Surprise 29 8.682635
Joy Love 28 8.383234
Anger Fear 16 4.804805
Sadness Fear 9 2.702703
Anger Sadness 8 2.402402
Fear Anger 8 2.395210
Anger Joy 7 2.102102
Sadness Anger 6 1.801802
Fear Sadness 5 1.497006
Surprise Fear 5 1.501502
Joy Surprise 5 1.497006
Joy Sadness 5 1.497006
Sadness Joy 5 1.501502
Joy Fear 5 1.497006
Joy Anger 4 1.197605
Fear Joy 3 0.898204
Sadness Surprise 2 0.600601
Sadness Love 2 0.600601
Love Sadness 2 0.600601
Love Joy 2 0.600601
In [118]:
# Visualize confusion pairs
fig, ax = plt.subplots(figsize=(14, 8))
top_15 = confusion_df.head(15)
pairs = [f"{row['True_Emotion']} → {row['Predicted_As']}"
for _, row in top_15.iterrows()]
colors = plt.cm.RdYlGn_r(top_15['Percentage'] / top_15['Percentage'].max())
bars = ax.barh(range(len(top_15)), top_15['Count'], color=colors, edgecolor='black')
ax.set_yticks(range(len(top_15)))
ax.set_yticklabels(pairs)
ax.set_xlabel('Number of Confusions', fontsize=12, fontweight='bold')
ax.set_title('Top 15 Emotion Confusion Pairs', fontsize=14, fontweight='bold')
ax.invert_yaxis()
ax.grid(True, alpha=0.3, axis='x')
# Add percentage labels
for i, (bar, row) in enumerate(zip(bars, top_15.itertuples())):
width = bar.get_width()
ax.text(width + 1, bar.get_y() + bar.get_height()/2,
f'{row.Count} ({row.Percentage:.1f}%)',
ha='left', va='center', fontweight='bold', fontsize=9)
plt.tight_layout()
plt.savefig('confusion_pairs_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
ANALYZE CONFUSED SAMPLES - FEAR vs SADNESS¶
In [119]:
print("\n" + "=" * 80)
print("SAMPLE ANALYSIS: FEAR CONFUSED AS SADNESS")
print("=" * 80)
# Find Fear samples predicted as Sadness
fear_as_sadness_indices = np.where((y_test == 4) & (best_lr_pred == 0))[0]
print(f"\nTotal Fear samples confused as Sadness: {len(fear_as_sadness_indices)}")
print("\nSample Examples:")
print("-" * 80)
# Show examples
for i, idx in enumerate(fear_as_sadness_indices[:10], 1):
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
print(f"\n{i}. {text}")
print(f" True: Fear | Predicted: Sadness")
print("\n" + "=" * 80)
print("SAMPLE ANALYSIS: SADNESS CONFUSED AS FEAR")
print("=" * 80)
# Find Sadness samples predicted as Fear
sadness_as_fear_indices = np.where((y_test == 0) & (best_lr_pred == 4))[0]
print(f"\nTotal Sadness samples confused as Fear: {len(sadness_as_fear_indices)}")
print("\nSample Examples:")
print("-" * 80)
for i, idx in enumerate(sadness_as_fear_indices[:10], 1):
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
print(f"\n{i}. {text}")
print(f" True: Sadness | Predicted: Fear")
================================================================================ SAMPLE ANALYSIS: FEAR CONFUSED AS SADNESS ================================================================================ Total Fear samples confused as Sadness: 5 Sample Examples: -------------------------------------------------------------------------------- 1. i get stressed or discouraged because of a few bad naps or i feel confused as to what hunter wants but those moments aren t quite as common anymore True: Fear | Predicted: Sadness 2. as a child ie hiking in reasonably rugged bush and becoming lost for a duration of hours True: Fear | Predicted: Sadness 3. i have found some people feel inhibited and perhaps embarrassed to dance but there are many ways to move True: Fear | Predicted: Sadness 4. i feel absolutely no concern whatsoever that fully unprotected sex will have absolutely zero consequences True: Fear | Predicted: Sadness 5. i cant help but feel helpless n disappointed True: Fear | Predicted: Sadness ================================================================================ SAMPLE ANALYSIS: SADNESS CONFUSED AS FEAR ================================================================================ Total Sadness samples confused as Fear: 9 Sample Examples: -------------------------------------------------------------------------------- 1. i won t lie sometimes i feel helpless when i m trying to decipher a new script or something that i m completely unfamiliar with True: Sadness | Predicted: Fear 2. i feel quite helpless True: Sadness | Predicted: Fear 3. i often feel inhibited when it comes to being with too many people esp when the people who make the decisions is the one who is the most assertive and influential and not because he she is the most reasonable nor because he she is in the best position to do so True: Sadness | Predicted: Fear 4. i seriously feel like i am being verbally assaulted by people just for working at a gas station and being the first one in the uniform that they can yell at True: Sadness | Predicted: Fear 5. i feel like my adrenals are being assaulted True: Sadness | Predicted: Fear 6. i know they arent too terribly picky and so therefore i feel less inhibited True: Sadness | Predicted: Fear 7. i can t help but feel a little disadvantaged in just a fighter but i ve got maneuverability on my side True: Sadness | Predicted: Fear 8. i cant control wall street or corporate fraud or even how my own elected officials conduct themselves i feel helpless and frightened for my own financial future so im going to find some way to control something True: Sadness | Predicted: Fear 9. i often feel inhibited by everything and so i shut down True: Sadness | Predicted: Fear
WORD ANALYSIS - CONFUSED EMOTIONS¶
In [120]:
print("\n" + "=" * 80)
print("VOCABULARY ANALYSIS OF CONFUSED EMOTIONS")
print("=" * 80)
def get_words_from_confused_samples(test_df, y_test, y_pred,
true_label, pred_label, preprocessor):
"""Extract words from confused samples"""
confused_indices = np.where((y_test == true_label) & (y_pred == pred_label))[0]
all_words = []
for idx in confused_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
cleaned = preprocessor.preprocess(text)
words = cleaned.split()
all_words.extend(words)
return Counter(all_words)
# Get words from Fear confused as Sadness
fear_as_sadness_words = get_words_from_confused_samples(
test_df, y_test, best_lr_pred, 4, 0, preprocessor
)
# Get words from Sadness confused as Fear
sadness_as_fear_words = get_words_from_confused_samples(
test_df, y_test, best_lr_pred, 0, 4, preprocessor
)
# Get words from correctly classified Fear
fear_correct_indices = np.where((y_test == 4) & (best_lr_pred == 4))[0]
fear_correct_words = []
for idx in fear_correct_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
cleaned = preprocessor.preprocess(text)
fear_correct_words.extend(cleaned.split())
fear_correct_words = Counter(fear_correct_words)
# Get words from correctly classified Sadness
sadness_correct_indices = np.where((y_test == 0) & (best_lr_pred == 0))[0]
sadness_correct_words = []
for idx in sadness_correct_indices:
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
cleaned = preprocessor.preprocess(text)
sadness_correct_words.extend(cleaned.split())
sadness_correct_words = Counter(sadness_correct_words)
print("\n TOP WORDS IN FEAR→SADNESS CONFUSIONS:")
print("-" * 80)
for word, count in fear_as_sadness_words.most_common(20):
print(f" {word:.<25} {count:>4}")
print("\n TOP WORDS IN SADNESS→FEAR CONFUSIONS:")
print("-" * 80)
for word, count in sadness_as_fear_words.most_common(20):
print(f" {word:.<25} {count:>4}")
================================================================================ VOCABULARY ANALYSIS OF CONFUSED EMOTIONS ================================================================================ TOP WORDS IN FEAR→SADNESS CONFUSIONS: -------------------------------------------------------------------------------- feel..................... 4 absolutely............... 2 get...................... 1 stressed................. 1 discouraged.............. 1 bad...................... 1 nap...................... 1 confused................. 1 hunter................... 1 want..................... 1 moment................... 1 quite.................... 1 common................... 1 anymore.................. 1 child.................... 1 ie....................... 1 hiking................... 1 reasonably............... 1 rugged................... 1 bush..................... 1 TOP WORDS IN SADNESS→FEAR CONFUSIONS: -------------------------------------------------------------------------------- feel..................... 9 helpless................. 3 inhibited................ 3 people................... 3 something................ 2 often.................... 2 one...................... 2 like..................... 2 assaulted................ 2 control.................. 2 lie...................... 1 sometimes................ 1 trying................... 1 decipher................. 1 new...................... 1 script................... 1 completely............... 1 unfamiliar............... 1 quite.................... 1 come..................... 1
In [121]:
# Visualize overlapping words
fig, axes = plt.subplots(1, 2, figsize=(18, 7))
# Fear words
fear_confused = fear_as_sadness_words.most_common(15)
fear_correct = [(w, fear_correct_words[w]) for w, _ in fear_confused]
x = np.arange(len(fear_confused))
width = 0.35
words_fear = [w for w, _ in fear_confused]
confused_counts = [c for _, c in fear_confused]
correct_counts = [c for _, c in fear_correct]
bars1 = axes[0].bar(x - width/2, confused_counts, width,
label='Fear→Sadness (Confused)', alpha=0.8, color='salmon')
bars2 = axes[0].bar(x + width/2, correct_counts, width,
label='Fear→Fear (Correct)', alpha=0.8, color='lightgreen')
axes[0].set_xlabel('Words', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Frequency', fontsize=12, fontweight='bold')
axes[0].set_title('Word Frequency: Fear Confused vs Correct',
fontsize=14, fontweight='bold')
axes[0].set_xticks(x)
axes[0].set_xticklabels(words_fear, rotation=45, ha='right')
axes[0].legend()
axes[0].grid(True, alpha=0.3, axis='y')
# Sadness words
sadness_confused = sadness_as_fear_words.most_common(15)
sadness_correct = [(w, sadness_correct_words[w]) for w, _ in sadness_confused]
words_sadness = [w for w, _ in sadness_confused]
confused_counts_s = [c for _, c in sadness_confused]
correct_counts_s = [c for _, c in sadness_correct]
bars3 = axes[1].bar(x - width/2, confused_counts_s, width,
label='Sadness→Fear (Confused)', alpha=0.8, color='salmon')
bars4 = axes[1].bar(x + width/2, correct_counts_s, width,
label='Sadness→Sadness (Correct)', alpha=0.8, color='lightgreen')
axes[1].set_xlabel('Words', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Frequency', fontsize=12, fontweight='bold')
axes[1].set_title('Word Frequency: Sadness Confused vs Correct',
fontsize=14, fontweight='bold')
axes[1].set_xticks(x)
axes[1].set_xticklabels(words_sadness, rotation=45, ha='right')
axes[1].legend()
axes[1].grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('confused_vs_correct_words.png', dpi=300, bbox_inches='tight')
plt.show()
IDENTIFY AMBIGUOUS WORDS¶
In [122]:
print("\n" + "=" * 80)
print("IDENTIFYING AMBIGUOUS WORDS (CAUSING CONFUSION)")
print("=" * 80)
# Find words that appear frequently in both Fear and Sadness
fear_words_set = set([w for w, _ in fear_correct_words.most_common(100)])
sadness_words_set = set([w for w, _ in sadness_correct_words.most_common(100)])
ambiguous_words = fear_words_set.intersection(sadness_words_set)
print(f"\n Found {len(ambiguous_words)} ambiguous words appearing in both emotions")
print("\nMost frequent ambiguous words:")
print("-" * 80)
ambiguous_freq = []
for word in ambiguous_words:
total_freq = fear_correct_words[word] + sadness_correct_words[word]
ambiguous_freq.append((word, fear_correct_words[word],
sadness_correct_words[word], total_freq))
ambiguous_freq.sort(key=lambda x: x[3], reverse=True)
print(f"\n{'Word':<20} {'Fear Freq':<15} {'Sadness Freq':<15} {'Total':<10}")
print("-" * 80)
for word, fear_f, sad_f, total in ambiguous_freq[:30]:
print(f"{word:<20} {fear_f:<15} {sad_f:<15} {total:<10}")
================================================================================ IDENTIFYING AMBIGUOUS WORDS (CAUSING CONFUSION) ================================================================================ Found 46 ambiguous words appearing in both emotions Most frequent ambiguous words: -------------------------------------------------------------------------------- Word Fear Freq Sadness Freq Total -------------------------------------------------------------------------------- feel 184 226 410 feeling 119 116 235 like 37 49 86 im 47 33 80 not 37 42 79 know 23 16 39 time 21 17 38 really 19 18 37 think 19 15 34 still 17 16 33 ive 17 15 32 little 22 9 31 dont 14 16 30 make 13 14 27 thing 10 15 25 bit 16 9 25 get 6 18 24 day 12 12 24 life 8 13 21 would 12 8 20 way 13 7 20 no 8 11 19 thought 5 14 19 even 8 11 19 never 11 7 18 around 9 9 18 one 9 8 17 sad 5 11 16 something 9 7 16 friend 7 8 15
In [123]:
# Visualize ambiguous words
fig, ax = plt.subplots(figsize=(14, 8))
top_ambiguous = ambiguous_freq[:20]
words_amb = [w for w, _, _, _ in top_ambiguous]
fear_freq = [f for _, f, _, _ in top_ambiguous]
sad_freq = [s for _, _, s, _ in top_ambiguous]
x = np.arange(len(top_ambiguous))
width = 0.35
bars1 = ax.bar(x - width/2, fear_freq, width, label='Fear', alpha=0.8, color='#ff6b6b')
bars2 = ax.bar(x + width/2, sad_freq, width, label='Sadness', alpha=0.8, color='#4ecdc4')
ax.set_xlabel('Ambiguous Words', fontsize=12, fontweight='bold')
ax.set_ylabel('Frequency', fontsize=12, fontweight='bold')
ax.set_title('Top 20 Ambiguous Words (Appear in Both Fear & Sadness)',
fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(words_amb, rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('ambiguous_words_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
In [124]:
print("\n KEY INSIGHTS:")
print("""
1. Words like 'feel', 'want', 'know' appear in both emotions
2. These are common words that lack emotional specificity
3. Context is crucial for disambiguation
4. TF-IDF alone cannot distinguish these cases
5. Need sequential models (LSTM, BERT) for better context understanding
""")
KEY INSIGHTS: 1. Words like 'feel', 'want', 'know' appear in both emotions 2. These are common words that lack emotional specificity 3. Context is crucial for disambiguation 4. TF-IDF alone cannot distinguish these cases 5. Need sequential models (LSTM, BERT) for better context understanding
IMPROVED TF-IDF WITH BETTER PARAMETERS¶
In [125]:
print("\n" + "=" * 80)
print("OPTIMIZED TF-IDF FEATURES")
print("=" * 80)
print("\n Testing different TF-IDF configurations...")
tfidf_configs = [
{'name': 'Baseline', 'params': {'max_features': 5000, 'ngram_range': (1, 2)}},
{'name': 'More Features', 'params': {'max_features': 10000, 'ngram_range': (1, 2)}},
{'name': 'Trigrams', 'params': {'max_features': 5000, 'ngram_range': (1, 3)}},
{'name': 'Char N-grams', 'params': {'max_features': 5000, 'ngram_range': (1, 2),
'analyzer': 'char', 'ngram_range': (3, 5)}},
{'name': 'Combined', 'params': {'max_features': 10000, 'ngram_range': (1, 3),
'min_df': 3, 'max_df': 0.7}},
]
tfidf_results = []
for config in tfidf_configs:
print(f"\n Testing: {config['name']}")
# Create vectorizer
if 'analyzer' in config['params']:
vectorizer = TfidfVectorizer(**config['params'], sublinear_tf=True)
else:
vectorizer = TfidfVectorizer(**config['params'], sublinear_tf=True)
# Transform data
X_train_new = vectorizer.fit_transform(train_df['cleaned_text'])
X_test_new = vectorizer.transform(test_df['cleaned_text'])
# Train Logistic Regression
lr_new = LogisticRegression(C=2.0, max_iter=1000, random_state=42,
)
lr_new.fit(X_train_new, y_train)
# Evaluate
y_pred_new = lr_new.predict(X_test_new)
acc = accuracy_score(y_test, y_pred_new)
f1 = f1_score(y_test, y_pred_new, average='macro')
tfidf_results.append({
'Configuration': config['name'],
'Accuracy': acc,
'F1-Score': f1,
'Features': X_train_new.shape[1]
})
print(f" Accuracy: {acc:.4f}, F1-Score: {f1:.4f}, Features: {X_train_new.shape[1]}")
# Display results
tfidf_results_df = pd.DataFrame(tfidf_results)
print("\n TF-IDF CONFIGURATION COMPARISON:")
print("=" * 80)
print(tfidf_results_df.to_string(index=False))
================================================================================
OPTIMIZED TF-IDF FEATURES
================================================================================
Testing different TF-IDF configurations...
Testing: Baseline
Accuracy: 0.9150, F1-Score: 0.9147, Features: 5000
Testing: More Features
Accuracy: 0.9140, F1-Score: 0.9136, Features: 10000
Testing: Trigrams
Accuracy: 0.9165, F1-Score: 0.9161, Features: 5000
Testing: Char N-grams
Accuracy: 0.8180, F1-Score: 0.8171, Features: 5000
Testing: Combined
Accuracy: 0.9160, F1-Score: 0.9157, Features: 10000
TF-IDF CONFIGURATION COMPARISON:
================================================================================
Configuration Accuracy F1-Score Features
Baseline 0.9150 0.914660 5000
More Features 0.9140 0.913648 10000
Trigrams 0.9165 0.916148 5000
Char N-grams 0.8180 0.817057 5000
Combined 0.9160 0.915655 10000
In [126]:
# Visualize
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Accuracy Plot
axes[0].bar(tfidf_results_df['Configuration'], tfidf_results_df['Accuracy'],
alpha=0.85, edgecolor='black', color='skyblue')
axes[0].set_ylabel('Accuracy', fontsize=12, fontweight='bold')
axes[0].set_title('TF-IDF Configurations - Accuracy', fontsize=14, fontweight='bold')
axes[0].tick_params(axis='x', rotation=30)
axes[0].grid(True, alpha=0.3, axis='y')
# Dynamically set ylim based on max value
acc_max = tfidf_results_df['Accuracy'].max()
axes[0].set_ylim([0.7, acc_max + 0.03])
for i, v in enumerate(tfidf_results_df['Accuracy']):
axes[0].text(i, v + 0.005, f'{v:.4f}', ha='center', va='bottom', fontsize=10)
# F1-Score Plot
axes[1].bar(tfidf_results_df['Configuration'], tfidf_results_df['F1-Score'],
alpha=0.85, edgecolor='black', color='lightgreen')
axes[1].set_ylabel('F1-Score (Macro)', fontsize=12, fontweight='bold')
axes[1].set_title('TF-IDF Configurations - F1-Score', fontsize=14, fontweight='bold')
axes[1].tick_params(axis='x', rotation=30)
axes[1].grid(True, alpha=0.3, axis='y')
f1_max = tfidf_results_df['F1-Score'].max()
axes[1].set_ylim([0.7, f1_max + 0.03])
for i, v in enumerate(tfidf_results_df['F1-Score']):
axes[1].text(i, v + 0.005, f'{v:.4f}', ha='center', va='bottom', fontsize=10)
# Final layout adjustments
plt.tight_layout(pad=2.0)
plt.savefig('tfidf_configurations_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
WEIGHTED CLASSES (FOR CONFUSED EMOTIONS)¶
In [127]:
print("\n" + "=" * 80)
print("CLASS WEIGHTING FOR CONFUSED EMOTIONS")
print("=" * 80)
print("""
CLASS WEIGHTING STRATEGY:
Problem: Fear and Sadness frequently confused
Solution: Assign higher penalty for misclassifying these classes
This forces the model to pay more attention to distinguishing these emotions.
""")
# Calculate class weights focusing on confused classes
from sklearn.utils.class_weight import compute_class_weight
# Standard balanced weights
classes = np.unique(y_train)
balanced_weights = compute_class_weight('balanced', classes=classes, y=y_train)
# Custom weights (emphasize Fear and Sadness)
custom_weights = balanced_weights.copy()
custom_weights[0] *= 1.5 # Sadness
custom_weights[4] *= 1.5 # Fear
class_weight_dict = {i: w for i, w in enumerate(custom_weights)}
print("\n CLASS WEIGHTS:")
print("-" * 80)
for i, (emotion, weight) in enumerate(zip(emotion_labels.values(), custom_weights)):
print(f" {emotion:.<20} {weight:.4f}")
# Train with class weights
lr_weighted = LogisticRegression(C=2.0, max_iter=1000, random_state=42,
class_weight=class_weight_dict)
lr_weighted.fit(X_train_tfidf, y_train)
y_pred_weighted = lr_weighted.predict(X_test_tfidf)
acc_weighted = accuracy_score(y_test, y_pred_weighted)
f1_weighted = f1_score(y_test, y_pred_weighted, average='macro')
print(f"\n RESULTS WITH CLASS WEIGHTING:")
print(f" Accuracy: {acc_weighted:.4f} ({acc_weighted*100:.2f}%)")
print(f" F1-Score: {f1_weighted:.4f}")
================================================================================ CLASS WEIGHTING FOR CONFUSED EMOTIONS ================================================================================ CLASS WEIGHTING STRATEGY: Problem: Fear and Sadness frequently confused Solution: Assign higher penalty for misclassifying these classes This forces the model to pay more attention to distinguishing these emotions. CLASS WEIGHTS: -------------------------------------------------------------------------------- Sadness............. 1.4997 Joy................. 1.0002 Love................ 0.9998 Anger............... 1.0002 Fear................ 1.4997 Surprise............ 1.0002 RESULTS WITH CLASS WEIGHTING: Accuracy: 0.9100 (91.00%) F1-Score: 0.9097
In [128]:
# Check confusion between Fear and Sadness
cm_weighted = confusion_matrix(y_test, y_pred_weighted)
fear_as_sadness_weighted = cm_weighted[4][0]
sadness_as_fear_weighted = cm_weighted[0][4]
cm_baseline = confusion_matrix(y_test, best_lr_pred)
fear_as_sadness_baseline = cm_baseline[4][0]
sadness_as_fear_baseline = cm_baseline[0][4]
print(f"\n FEAR-SADNESS CONFUSION COMPARISON:")
print("-" * 80)
print(f" Baseline:")
print(f" Fear → Sadness: {fear_as_sadness_baseline}")
print(f" Sadness → Fear: {sadness_as_fear_baseline}")
print(f" With Class Weighting:")
print(f" Fear → Sadness: {fear_as_sadness_weighted} ({fear_as_sadness_weighted - fear_as_sadness_baseline:+d})")
print(f" Sadness → Fear: {sadness_as_fear_weighted} ({sadness_as_fear_weighted - sadness_as_fear_baseline:+d})")
FEAR-SADNESS CONFUSION COMPARISON:
--------------------------------------------------------------------------------
Baseline:
Fear → Sadness: 5
Sadness → Fear: 9
With Class Weighting:
Fear → Sadness: 4 (-1)
Sadness → Fear: 9 (+0)
ENSEMBLE WITH DIFFERENT FEATURES¶
In [129]:
print("\n" + "=" * 80)
print("STRATEGY 4: ENSEMBLE WITH DIVERSE FEATURE SETS")
print("=" * 80)
print("""
DIVERSE ENSEMBLE STRATEGY:
Instead of ensembling models with same features:
1. Model 1: Word TF-IDF (1,2)-grams
2. Model 2: Character TF-IDF (3,5)-grams
3. Model 3: Negation-handled features
Each captures different linguistic patterns.
""")
# Already trained models with different features
# Model 1: Best LR (word features)
# Model 2: Character n-grams
vectorizer_char = TfidfVectorizer(analyzer='char', ngram_range=(3, 5),
max_features=5000)
X_train_char = vectorizer_char.fit_transform(train_df['cleaned_text'])
X_test_char = vectorizer_char.transform(test_df['cleaned_text'])
lr_char = LogisticRegression(C=1.0, max_iter=1000, random_state=42,
)
lr_char.fit(X_train_char, y_train)
# Model 3: Negation features (already trained as lr_neg)
# Get predictions from all three
pred_word = best_lr.predict(X_test_tfidf)
pred_char = lr_char.predict(X_test_char)
# Voting ensemble
from scipy import stats
pred_ensemble_diverse = []
for i in range(len(y_test)):
votes = [pred_word[i], pred_char[i]]
# Majority vote
majority = stats.mode(votes, keepdims=True)[0][0]
pred_ensemble_diverse.append(majority)
pred_ensemble_diverse = np.array(pred_ensemble_diverse)
acc_diverse = accuracy_score(y_test, pred_ensemble_diverse)
f1_diverse = f1_score(y_test, pred_ensemble_diverse, average='macro')
print(f"\n DIVERSE ENSEMBLE RESULTS:")
print(f" Accuracy: {acc_diverse:.4f} ({acc_diverse*100:.2f}%)")
print(f" F1-Score: {f1_diverse:.4f}")
print(f" Improvement over best single model: {(f1_diverse - best_lr_f1)*100:+.2f}%")
================================================================================ STRATEGY 4: ENSEMBLE WITH DIVERSE FEATURE SETS ================================================================================ DIVERSE ENSEMBLE STRATEGY: Instead of ensembling models with same features: 1. Model 1: Word TF-IDF (1,2)-grams 2. Model 2: Character TF-IDF (3,5)-grams 3. Model 3: Negation-handled features Each captures different linguistic patterns. DIVERSE ENSEMBLE RESULTS: Accuracy: 0.8525 (85.25%) F1-Score: 0.8517 Improvement over best single model: -6.78%
POST-PROCESSING RULES¶
In [130]:
print("\n" + "=" * 80)
print("STRATEGY 5: RULE-BASED POST-PROCESSING")
print("=" * 80)
print("""
POST-PROCESSING RULES:
Add domain knowledge to correct common confusions:
1. If text contains "afraid", "scared", "terrified" → Likely Fear
2. If text contains "miss", "lost", "alone" → Likely Sadness
3. If text contains "angry", "hate", "furious" → Likely Anger
Apply rules only when model confidence is low.
""")
# Define emotion-specific keywords
emotion_keywords = {
'Sadness': ['sad', 'depressed', 'alone', 'miss', 'lost', 'cry', 'tear',
'heartbroken', 'empty', 'miserable'],
'Joy': ['happy', 'excited', 'great', 'wonderful', 'amazing', 'awesome',
'fantastic', 'perfect', 'best', 'love'],
'Love': ['love', 'adore', 'sweetheart', 'darling', 'forever', 'together',
'romance', 'kiss', 'hug', 'heart'],
'Anger': ['angry', 'mad', 'hate', 'furious', 'annoyed', 'frustrated',
'pissed', 'rage', 'stupid', 'terrible'],
'Fear': ['afraid', 'scared', 'terrified', 'worried', 'anxious', 'panic',
'fear', 'nervous', 'frightened', 'stress'],
'Surprise': ['wow', 'omg', 'amazing', 'unbelievable', 'shocked', 'surprised',
'unexpected', 'cant believe', 'never thought', 'suddenly']
}
def apply_post_processing_rules(text, prediction, confidence, threshold=0.4):
"""Apply rules when model confidence is low"""
if confidence > threshold:
return prediction # High confidence, keep prediction
# Count keyword matches for each emotion
text_lower = text.lower()
scores = {}
for emotion, keywords in emotion_keywords.items():
score = sum(1 for keyword in keywords if keyword in text_lower)
if score > 0:
scores[emotion] = score
if not scores:
return prediction # No keyword matches, keep original
# Get emotion with most keyword matches
best_emotion = max(scores, key=scores.get)
# Convert emotion name to label
emotion_to_label = {v: k for k, v in emotion_labels.items()}
return emotion_to_label.get(best_emotion, prediction)
# Apply post-processing
print("\n Applying post-processing rules...")
# Get probabilities from best model
proba = best_lr.predict_proba(X_test_tfidf)
confidences = proba.max(axis=1)
pred_postprocessed = []
corrections = 0
for i in range(len(y_test)):
actual_idx = test_df.index[i]
text = test_df.loc[actual_idx, 'text']
original_pred = best_lr_pred[i]
confidence = confidences[i]
new_pred = apply_post_processing_rules(text, original_pred, confidence)
pred_postprocessed.append(new_pred)
if new_pred != original_pred:
corrections += 1
pred_postprocessed = np.array(pred_postprocessed)
acc_postproc = accuracy_score(y_test, pred_postprocessed)
f1_postproc = f1_score(y_test, pred_postprocessed, average='macro')
print(f"\n POST-PROCESSING RESULTS:")
print(f" Number of predictions corrected: {corrections}")
print(f" Accuracy: {acc_postproc:.4f} ({acc_postproc*100:.2f}%)")
print(f" F1-Score: {f1_postproc:.4f}")
print(f" Improvement: {(f1_postproc - best_lr_f1)*100:+.2f}%")
================================================================================ STRATEGY 5: RULE-BASED POST-PROCESSING ================================================================================ POST-PROCESSING RULES: Add domain knowledge to correct common confusions: 1. If text contains "afraid", "scared", "terrified" → Likely Fear 2. If text contains "miss", "lost", "alone" → Likely Sadness 3. If text contains "angry", "hate", "furious" → Likely Anger Apply rules only when model confidence is low. Applying post-processing rules... POST-PROCESSING RESULTS: Number of predictions corrected: 18 Accuracy: 0.9125 (91.25%) F1-Score: 0.9119 Improvement: -0.76%
In [131]:
all_models_comparison = [
# Baseline Models
{'Model': 'Naive Bayes (Baseline)',
'Accuracy': mnb_test_acc,
'F1_Macro': f1_score(y_test, mnb_test_pred, average='macro'),
'Precision': precision_score(y_test, mnb_test_pred, average='macro'),
'Recall': recall_score(y_test, mnb_test_pred, average='macro'),
'Category': 'Baseline'},
{'Model': 'Logistic Regression (Baseline)',
'Accuracy': lr_test_acc,
'F1_Macro': f1_score(y_test, lr_test_pred, average='macro'),
'Precision': precision_score(y_test, lr_test_pred, average='macro'),
'Recall': recall_score(y_test, lr_test_pred, average='macro'),
'Category': 'Baseline'},
{'Model': 'SVM (Baseline)',
'Accuracy': svm_test_acc,
'F1_Macro': f1_score(y_test, svm_test_pred, average='macro'),
'Precision': precision_score(y_test, svm_test_pred, average='macro'),
'Recall': recall_score(y_test, svm_test_pred, average='macro'),
'Category': 'Baseline'},
{'Model': 'Ensemble (Baseline)',
'Accuracy': ensemble_test_acc,
'F1_Macro': f1_score(y_test, ensemble_test_pred, average='macro'),
'Precision': precision_score(y_test, ensemble_test_pred, average='macro'),
'Recall': recall_score(y_test, ensemble_test_pred, average='macro'),
'Category': 'Baseline'},
# Tuned Models
{'Model': 'Naive Bayes (Tuned)',
'Accuracy': best_mnb_acc,
'F1_Macro': best_mnb_f1,
'Precision': precision_score(y_test, best_mnb_pred, average='macro'),
'Recall': recall_score(y_test, best_mnb_pred, average='macro'),
'Category': 'Hyperparameter Tuned'},
{'Model': 'Logistic Regression (Tuned)',
'Accuracy': best_lr_acc,
'F1_Macro': best_lr_f1,
'Precision': precision_score(y_test, best_lr_pred, average='macro'),
'Recall': recall_score(y_test, best_lr_pred, average='macro'),
'Category': 'Hyperparameter Tuned'},
{'Model': 'SVM (Tuned)',
'Accuracy': best_svm_acc,
'F1_Macro': best_svm_f1,
'Precision': precision_score(y_test, best_svm_pred, average='macro'),
'Recall': recall_score(y_test, best_svm_pred, average='macro'),
'Category': 'Hyperparameter Tuned'},
{'Model': 'LR + Class Weighting',
'Accuracy': acc_weighted,
'F1_Macro': f1_weighted,
'Precision': precision_score(y_test, y_pred_weighted, average='macro'),
'Recall': recall_score(y_test, y_pred_weighted, average='macro'),
'Category': 'Advanced Features'},
{'Model': 'Diverse Ensemble',
'Accuracy': acc_diverse,
'F1_Macro': f1_diverse,
'Precision': precision_score(y_test, pred_ensemble_diverse, average='macro'),
'Recall': recall_score(y_test, pred_ensemble_diverse, average='macro'),
'Category': 'Advanced Ensemble'},
{'Model': 'LR + Post-Processing',
'Accuracy': acc_postproc,
'F1_Macro': f1_postproc,
'Precision': precision_score(y_test, pred_postprocessed, average='macro'),
'Recall': recall_score(y_test, pred_postprocessed, average='macro'),
'Category': 'Advanced Features'},
]
In [132]:
comparison_df = pd.DataFrame(all_models_comparison)
comparison_df = comparison_df.sort_values('F1_Macro', ascending=False)
print("\n MODEL COMPARISON:")
print(comparison_df.to_string(index=False))
MODEL COMPARISON:
Model Accuracy F1_Macro Precision Recall Category
Logistic Regression (Tuned) 0.9200 0.919459 0.920761 0.920058 Hyperparameter Tuned
SVM (Tuned) 0.9190 0.918472 0.919813 0.919055 Hyperparameter Tuned
Ensemble (Baseline) 0.9170 0.916483 0.917725 0.917061 Baseline
Logistic Regression (Baseline) 0.9160 0.915494 0.916605 0.916058 Baseline
SVM (Baseline) 0.9130 0.912618 0.913447 0.913051 Baseline
LR + Post-Processing 0.9125 0.911865 0.913068 0.912559 Advanced Features
LR + Class Weighting 0.9100 0.909748 0.911029 0.910045 Advanced Features
Naive Bayes (Tuned) 0.8905 0.889698 0.893222 0.890567 Hyperparameter Tuned
Naive Bayes (Baseline) 0.8835 0.882856 0.886169 0.883566 Baseline
Diverse Ensemble 0.8525 0.851700 0.858363 0.852582 Advanced Ensemble
In [133]:
!pip install transformers torch accelerate datasets tqdm
Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.57.3) Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (2.9.0+cu126) Requirement already satisfied: accelerate in /usr/local/lib/python3.12/dist-packages (1.12.0) Requirement already satisfied: datasets in /usr/local/lib/python3.12/dist-packages (4.0.0) Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (4.67.1) Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.20.2) Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.36.0) Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2.0.2) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0) Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.3) Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2025.11.3) Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4) Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.2) Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.7.0) Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch) (4.15.0) Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch) (75.2.0) Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.14.0) Requirement already satisfied: networkx>=2.5.1 in /usr/local/lib/python3.12/dist-packages (from torch) (3.6.1) Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch) (3.1.6) Requirement already satisfied: fsspec>=0.8.5 in /usr/local/lib/python3.12/dist-packages (from torch) (2025.3.0) Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77) Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77) Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.80) Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch) (9.10.2.21) Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.4.1) Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch) (11.3.0.4) Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch) (10.3.7.77) Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch) (11.7.1.2) Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch) (12.5.4.2) Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch) (0.7.1) Requirement already satisfied: nvidia-nccl-cu12==2.27.5 in /usr/local/lib/python3.12/dist-packages (from torch) (2.27.5) Requirement already satisfied: nvidia-nvshmem-cu12==3.3.20 in /usr/local/lib/python3.12/dist-packages (from torch) (3.3.20) Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77) Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.85) Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch) (1.11.1.6) Requirement already satisfied: triton==3.5.0 in /usr/local/lib/python3.12/dist-packages (from torch) (3.5.0) Requirement already satisfied: psutil in /usr/local/lib/python3.12/dist-packages (from accelerate) (5.9.5) Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (18.1.0) Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.3.8) Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets) (2.2.2) Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets) (3.6.0) Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.70.16) Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.13.3) Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.2.0) Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.4) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.11) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2026.1.4) Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch) (1.3.0) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch) (3.0.3) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2) Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.3) Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1) Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.4.0) Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.4.0) Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.8.0) Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.7.0) Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.4.1) Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.22.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)
In [134]:
!pip install transformers torch scikit-learn --quiet
In [135]:
# Deep Learning
!pip install tf-keras
from transformers import (
DistilBertTokenizer, DistilBertForSequenceClassification,
Trainer, TrainingArguments
)
import torch
from torch.utils.data import Dataset
Requirement already satisfied: tf-keras in /usr/local/lib/python3.12/dist-packages (2.19.0) Requirement already satisfied: tensorflow<2.20,>=2.19 in /usr/local/lib/python3.12/dist-packages (from tf-keras) (2.19.0) Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (1.4.0) Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (1.6.3) Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (25.12.19) Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (0.7.0) Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (0.2.0) Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (18.1.1) Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (3.4.0) Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (25.0) Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (5.29.5) Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (2.32.4) Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (75.2.0) Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (1.17.0) Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (3.3.0) Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (4.15.0) Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (2.0.1) Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (1.76.0) Requirement already satisfied: tensorboard~=2.19.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (2.19.0) Requirement already satisfied: keras>=3.5.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (3.10.0) Requirement already satisfied: numpy<2.2.0,>=1.26.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (2.0.2) Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (3.15.1) Requirement already satisfied: ml-dtypes<1.0.0,>=0.5.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19->tf-keras) (0.5.4) Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow<2.20,>=2.19->tf-keras) (0.45.1) Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras>=3.5.0->tensorflow<2.20,>=2.19->tf-keras) (13.9.4) Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.5.0->tensorflow<2.20,>=2.19->tf-keras) (0.1.0) Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.5.0->tensorflow<2.20,>=2.19->tf-keras) (0.18.0) Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow<2.20,>=2.19->tf-keras) (3.4.4) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow<2.20,>=2.19->tf-keras) (3.11) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow<2.20,>=2.19->tf-keras) (2.5.0) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow<2.20,>=2.19->tf-keras) (2026.1.4) Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19->tf-keras) (3.10) Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19->tf-keras) (0.7.2) Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19->tf-keras) (3.1.5) Requirement already satisfied: markupsafe>=2.1.1 in /usr/local/lib/python3.12/dist-packages (from werkzeug>=1.0.1->tensorboard~=2.19.0->tensorflow<2.20,>=2.19->tf-keras) (3.0.3) Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras>=3.5.0->tensorflow<2.20,>=2.19->tf-keras) (4.0.0) Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras>=3.5.0->tensorflow<2.20,>=2.19->tf-keras) (2.19.2) Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.5.0->tensorflow<2.20,>=2.19->tf-keras) (0.1.2)
Prepare Data for DistilBERT¶
In [136]:
print("\n" + "=" * 80)
print("DEEP LEARNING MODEL - DISTILBERT")
print("=" * 80)
# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\n Using device: {device}")
if torch.cuda.is_available():
print(f" GPU: {torch.cuda.get_device_name(0)}")
print(f" Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
print(" CPU used.")
# Initialize tokenizer
print("\n Loading DistilBERT tokenizer...")
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
print(" Tokenizer loaded")
================================================================================ DEEP LEARNING MODEL - DISTILBERT ================================================================================ Using device: cuda GPU: Tesla T4 Memory: 15.83 GB Loading DistilBERT tokenizer...
tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]
vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]
tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]
config.json: 0%| | 0.00/483 [00:00<?, ?B/s]
Tokenizer loaded
In [137]:
# Tokenize the datasets (using original text, not preprocessed)
# DistilBERT handles its own preprocessing
def tokenize_data(texts, max_length=128):
"""Tokenize texts using DistilBERT tokenizer"""
return tokenizer(
texts.tolist(),
padding='max_length',
truncation=True,
max_length=max_length,
return_tensors='pt'
)
print("\n Tokenizing datasets...")
print(" This may take a few minutes...")
# Use original text (not preprocessed) for BERT
train_encodings = tokenize_data(train_df['text'])
val_encodings = tokenize_data(val_df['text'])
test_encodings = tokenize_data(test_df['text'])
print(" Tokenization completed")
Tokenizing datasets... This may take a few minutes... Tokenization completed
CREATE PYTORCH DATASET¶
In [138]:
class EmotionDataset(Dataset):
"""Custom Dataset for Emotion Classification"""
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: val[idx] for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
# Create datasets
train_dataset = EmotionDataset(train_encodings, y_train)
val_dataset = EmotionDataset(val_encodings, y_val)
test_dataset = EmotionDataset(test_encodings, y_test)
print(f"\n DATASET SIZES:")
print(f" Training: {len(train_dataset)} samples")
print(f" Validation: {len(val_dataset)} samples")
print(f" Test: {len(test_dataset)} samples")
DATASET SIZES: Training: 15999 samples Validation: 2000 samples Test: 2000 samples
LOAD AND FINE-TUNE DISTILBERT MODEL¶
In [139]:
print("\n" + "=" * 80)
print("FINE-TUNING DISTILBERT MODEL")
print("=" * 80)
print("\n Loading pre-trained DistilBERT model...")
model = DistilBertForSequenceClassification.from_pretrained(
'distilbert-base-uncased',
num_labels=6 # 6 emotion classes
)
model.to(device)
print(" Model loaded and moved to device")
# Define training arguments
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=16,
per_device_eval_batch_size=32,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=100,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
metric_for_best_model="accuracy",
report_to="none" # Disable wandb
)
# Define compute metrics function
def compute_metrics(eval_pred):
"""Compute metrics for evaluation"""
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
accuracy = accuracy_score(labels, predictions)
precision = precision_score(labels, predictions, average='macro')
recall = recall_score(labels, predictions, average='macro')
f1 = f1_score(labels, predictions, average='macro')
return {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1': f1
}
# Initialize Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics
)
print("\n Starting training...")
print(" Progress will be shown below...")
print("-" * 80)
# Train the model
train_result = trainer.train()
print("\n Training completed!")
print(f"\n TRAINING METRICS:")
print(f" Training Loss: {train_result.training_loss:.4f}")
print(f" Training Time: {train_result.metrics['train_runtime']:.2f} seconds")
================================================================================ FINE-TUNING DISTILBERT MODEL ================================================================================ Loading pre-trained DistilBERT model...
model.safetensors: 0%| | 0.00/268M [00:00<?, ?B/s]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Model loaded and moved to device Starting training... Progress will be shown below... --------------------------------------------------------------------------------
[3000/3000 09:39, Epoch 3/3]
| Epoch | Training Loss | Validation Loss | Accuracy | Precision | Recall | F1 |
|---|---|---|---|---|---|---|
| 1 | 0.219200 | 0.241112 | 0.936500 | 0.937943 | 0.936466 | 0.936214 |
| 2 | 0.151400 | 0.168649 | 0.941500 | 0.943389 | 0.941474 | 0.941270 |
| 3 | 0.097600 | 0.186176 | 0.945500 | 0.946993 | 0.945474 | 0.945209 |
Training completed! TRAINING METRICS: Training Loss: 0.2917 Training Time: 581.11 seconds
EVALUATE DISTILBERT MODEL
In [140]:
print("\n" + "=" * 80)
print("EVALUATING DISTILBERT MODEL")
print("=" * 80)
# Evaluate on validation set
print("\n Evaluating on validation set...")
val_results = trainer.evaluate()
print(f"\n VALIDATION METRICS:")
print(f" Accuracy: {val_results['eval_accuracy']:.4f} ({val_results['eval_accuracy']*100:.2f}%)")
print(f" Precision: {val_results['eval_precision']:.4f}")
print(f" Recall: {val_results['eval_recall']:.4f}")
print(f" F1-Score: {val_results['eval_f1']:.4f}")
# Evaluate on test set
print("\n Evaluating on test set...")
test_results = trainer.evaluate(test_dataset)
print(f"\n TEST METRICS:")
print(f" Accuracy: {test_results['eval_accuracy']:.4f} ({test_results['eval_accuracy']*100:.2f}%)")
print(f" Precision: {test_results['eval_precision']:.4f}")
print(f" Recall: {test_results['eval_recall']:.4f}")
print(f" F1-Score: {test_results['eval_f1']:.4f}")
# Get predictions
print("\n Generating predictions...")
test_predictions = trainer.predict(test_dataset)
bert_test_pred = np.argmax(test_predictions.predictions, axis=1)
# Save test accuracy for comparison
bert_test_acc = test_results['eval_accuracy']
================================================================================ EVALUATING DISTILBERT MODEL ================================================================================ Evaluating on validation set...
VALIDATION METRICS: Accuracy: 0.9455 (94.55%) Precision: 0.9470 Recall: 0.9455 F1-Score: 0.9452 Evaluating on test set... TEST METRICS: Accuracy: 0.9430 (94.30%) Precision: 0.9442 Recall: 0.9430 F1-Score: 0.9429 Generating predictions...
DISTILBERT CLASSIFICATION REPORT
In [141]:
print("\n" + "=" * 80)
print("DISTILBERT - DETAILED CLASSIFICATION REPORT")
print("=" * 80)
print(f"\nCLASSIFICATION REPORT (Test Set):")
print("-" * 80)
bert_report = classification_report(y_test, bert_test_pred,
target_names=list(emotion_labels.values()),
digits=4)
print(bert_report)
================================================================================
DISTILBERT - DETAILED CLASSIFICATION REPORT
================================================================================
CLASSIFICATION REPORT (Test Set):
--------------------------------------------------------------------------------
precision recall f1-score support
Sadness 0.9632 0.9429 0.9530 333
Joy 0.9807 0.9132 0.9457 334
Love 0.9320 0.9880 0.9592 333
Anger 0.9626 0.9279 0.9450 333
Fear 0.9200 0.8952 0.9074 334
Surprise 0.9066 0.9910 0.9469 333
accuracy 0.9430 2000
macro avg 0.9442 0.9430 0.9429 2000
weighted avg 0.9442 0.9430 0.9428 2000
In [142]:
# Confusion Matrix
bert_cm = confusion_matrix(y_test, bert_test_pred)
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(bert_cm, annot=True, fmt='d', cmap='RdPu',
xticklabels=list(emotion_labels.values()),
yticklabels=list(emotion_labels.values()),
cbar_kws={'label': 'Count'})
ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
ax.set_title('DistilBERT - Confusion Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('distilbert_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()
COMPARE TRADITIONAL ML WITH DEEP LEARNING
In [143]:
print("\n" + "=" * 80)
print("COMPARISON: TRADITIONAL ML vs DEEP LEARNING")
print("=" * 80)
# Create comprehensive comparison
final_comparison = {
'Model': ['Naive Bayes', 'Logistic Regression', 'SVM', 'Ensemble', 'DistilBERT'],
'Test Accuracy': [
mnb_test_acc,
lr_test_acc,
svm_test_acc,
ensemble_test_acc,
bert_test_acc
],
'Macro F1-Score': [
f1_score(y_test, mnb_test_pred, average='macro'),
f1_score(y_test, lr_test_pred, average='macro'),
f1_score(y_test, svm_test_pred, average='macro'),
f1_score(y_test, ensemble_test_pred, average='macro'),
f1_score(y_test, bert_test_pred, average='macro')
],
'Macro Precision': [
precision_score(y_test, mnb_test_pred, average='macro'),
precision_score(y_test, lr_test_pred, average='macro'),
precision_score(y_test, svm_test_pred, average='macro'),
precision_score(y_test, ensemble_test_pred, average='macro'),
precision_score(y_test, bert_test_pred, average='macro')
],
'Macro Recall': [
recall_score(y_test, mnb_test_pred, average='macro'),
recall_score(y_test, lr_test_pred, average='macro'),
recall_score(y_test, svm_test_pred, average='macro'),
recall_score(y_test, ensemble_test_pred, average='macro'),
recall_score(y_test, bert_test_pred, average='macro')
],
'Type': ['Traditional', 'Traditional', 'Traditional', 'Ensemble', 'Deep Learning']
}
final_comparison_df = pd.DataFrame(final_comparison)
print("\n FINAL MODEL COMPARISON:")
print("-" * 80)
print(final_comparison_df.to_string(index=False))
================================================================================
COMPARISON: TRADITIONAL ML vs DEEP LEARNING
================================================================================
FINAL MODEL COMPARISON:
--------------------------------------------------------------------------------
Model Test Accuracy Macro F1-Score Macro Precision Macro Recall Type
Naive Bayes 0.8835 0.882856 0.886169 0.883566 Traditional
Logistic Regression 0.9160 0.915494 0.916605 0.916058 Traditional
SVM 0.9130 0.912618 0.913447 0.913051 Traditional
Ensemble 0.9170 0.916483 0.917725 0.917061 Ensemble
DistilBERT 0.9430 0.942864 0.944187 0.943039 Deep Learning
In [144]:
# Visualization: Final Comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Plot 1: Test Accuracy
ax1 = axes[0, 0]
colors = ['skyblue', 'lightgreen', 'salmon', 'plum', 'gold']
bars = ax1.bar(final_comparison_df['Model'],
final_comparison_df['Test Accuracy'],
color=colors, alpha=0.8)
ax1.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax1.set_title('Test Accuracy Comparison', fontsize=14, fontweight='bold')
ax1.set_ylim([0, 1.0])
ax1.grid(True, alpha=0.3, axis='y')
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')
for bar in bars:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=10, fontweight='bold')
# Plot 2: F1-Score
ax2 = axes[0, 1]
bars = ax2.bar(final_comparison_df['Model'],
final_comparison_df['Macro F1-Score'],
color=colors, alpha=0.8)
ax2.set_ylabel('F1-Score', fontsize=12, fontweight='bold')
ax2.set_title('Macro F1-Score Comparison', fontsize=14, fontweight='bold')
ax2.set_ylim([0, 1.0])
ax2.grid(True, alpha=0.3, axis='y')
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha='right')
for bar in bars:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=10, fontweight='bold')
# Plot 3: Precision
ax3 = axes[1, 0]
bars = ax3.bar(final_comparison_df['Model'],
final_comparison_df['Macro Precision'],
color=colors, alpha=0.8)
ax3.set_ylabel('Precision', fontsize=12, fontweight='bold')
ax3.set_title('Macro Precision Comparison', fontsize=14, fontweight='bold')
ax3.set_ylim([0, 1.0])
ax3.grid(True, alpha=0.3, axis='y')
plt.setp(ax3.xaxis.get_majorticklabels(), rotation=45, ha='right')
for bar in bars:
height = bar.get_height()
ax3.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=10, fontweight='bold')
# Plot 4: Recall
ax4 = axes[1, 1]
bars = ax4.bar(final_comparison_df['Model'],
final_comparison_df['Macro Recall'],
color=colors, alpha=0.8)
ax4.set_ylabel('Recall', fontsize=12, fontweight='bold')
ax4.set_title('Macro Recall Comparison', fontsize=14, fontweight='bold')
ax4.set_ylim([0, 1.0])
ax4.grid(True, alpha=0.3, axis='y')
plt.setp(ax4.xaxis.get_majorticklabels(), rotation=45, ha='right')
for bar in bars:
height = bar.get_height()
ax4.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontsize=10, fontweight='bold')
plt.suptitle('Traditional ML vs Deep Learning - Complete Comparison',
fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.savefig('final_model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
PER-CLASS PERFORMANCE ANALYSIS
In [145]:
# Get per-class metrics for all models
models_predictions = {
'Naive Bayes': mnb_test_pred,
'Logistic Regression': lr_test_pred,
'SVM': svm_test_pred,
'Ensemble': ensemble_test_pred,
'DistilBERT': bert_test_pred
}
# Create detailed per-class comparison
emotion_names = list(emotion_labels.values())
per_class_metrics = []
for emotion_idx, emotion_name in enumerate(emotion_names):
for model_name, predictions in models_predictions.items():
prec, rec, f1, _ = precision_recall_fscore_support(
y_test, predictions, average=None, zero_division=0
)
per_class_metrics.append({
'Emotion': emotion_name,
'Model': model_name,
'Precision': prec[emotion_idx],
'Recall': rec[emotion_idx],
'F1-Score': f1[emotion_idx]
})
per_class_df = pd.DataFrame(per_class_metrics)
# Display results
print("\n PER-CLASS PERFORMANCE METRICS:")
print("-" * 80)
for emotion in emotion_names:
print(f"\n{emotion}:")
emotion_data = per_class_df[per_class_df['Emotion'] == emotion]
print(emotion_data[['Model', 'Precision', 'Recall', 'F1-Score']].to_string(index=False))
PER-CLASS PERFORMANCE METRICS:
--------------------------------------------------------------------------------
Sadness:
Model Precision Recall F1-Score
Naive Bayes 0.935897 0.876877 0.905426
Logistic Regression 0.936170 0.924925 0.930514
SVM 0.939024 0.924925 0.931921
Ensemble 0.948012 0.930931 0.939394
DistilBERT 0.963190 0.942943 0.952959
Joy:
Model Precision Recall F1-Score
Naive Bayes 0.909722 0.784431 0.842444
Logistic Regression 0.931373 0.853293 0.890625
SVM 0.922330 0.853293 0.886470
Ensemble 0.931148 0.850299 0.888889
DistilBERT 0.980707 0.913174 0.945736
Love:
Model Precision Recall F1-Score
Naive Bayes 0.871935 0.960961 0.914286
Logistic Regression 0.913408 0.981982 0.946454
SVM 0.917379 0.966967 0.941520
Ensemble 0.913408 0.981982 0.946454
DistilBERT 0.932011 0.987988 0.959184
Anger:
Model Precision Recall F1-Score
Naive Bayes 0.906832 0.876877 0.891603
Logistic Regression 0.940439 0.900901 0.920245
SVM 0.928349 0.894895 0.911315
Ensemble 0.934783 0.903904 0.919084
DistilBERT 0.962617 0.927928 0.944954
Fear:
Model Precision Recall F1-Score
Naive Bayes 0.860606 0.850299 0.855422
Logistic Regression 0.880734 0.862275 0.871407
SVM 0.879154 0.871257 0.875188
Ensemble 0.888889 0.862275 0.875380
DistilBERT 0.920000 0.895210 0.907436
Surprise:
Model Precision Recall F1-Score
Naive Bayes 0.832021 0.951952 0.887955
Logistic Regression 0.897507 0.972973 0.933718
SVM 0.894444 0.966967 0.929293
Ensemble 0.890110 0.972973 0.929699
DistilBERT 0.906593 0.990991 0.946915
In [146]:
# Pivot the DataFrame
pivot_df = per_class_df.pivot(index='Emotion', columns='Model', values='F1-Score')
# Set consistent color palette for models
model_colors = {
'DistilBERT': '#FF69B4', # pink
'Ensemble': '#A0522D', # brown
'Logistic Regression': '#228B22', # green
'Naive Bayes': '#20B2AA', # teal
'SVM': '#1E90FF' # blue
}
# Reorder columns if needed
pivot_df = pivot_df[sorted(model_colors.keys())]
# Plot
fig, ax = plt.subplots(figsize=(14, 8))
pivot_df.plot(kind='bar', ax=ax, width=0.75, alpha=0.9, color=[model_colors[col] for col in pivot_df.columns])
# Axis labels and title
ax.set_xlabel('Emotion', fontsize=12, fontweight='bold')
ax.set_ylabel('F1-Score', fontsize=12, fontweight='bold')
ax.set_title('Per-Emotion F1-Score Comparison Across All Models', fontsize=14, fontweight='bold')
# Legend
ax.legend(title='Model', fontsize=10, title_fontsize=11, loc='upper right')
# Grid and limits
ax.grid(True, alpha=0.3, axis='y')
ax.set_ylim([0.4, 1.10])
# X-tick formatting
plt.xticks(rotation=30, ha='center')
# Layout and save
plt.tight_layout(pad=2.0)
plt.savefig('per_emotion_f1_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
ERROR ANALYSIS - MISCLASSIFICATION PATTERNS
In [147]:
print("\n" + "=" * 80)
print("ERROR ANALYSIS - MISCLASSIFICATION PATTERNS")
print("=" * 80)
# Use DistilBERT for error analysis
misclassified_indices = np.where(y_test != bert_test_pred)[0]
print(f"\n Total misclassifications: {len(misclassified_indices)} out of {len(y_test)}")
print(f" Error rate: {len(misclassified_indices)/len(y_test)*100:.2f}%")
# Analyze confusion patterns
from collections import defaultdict
confusion_pairs = defaultdict(int)
for idx in misclassified_indices:
true_label = emotion_labels[y_test[idx]]
pred_label = emotion_labels[bert_test_pred[idx]]
confusion_pairs[(true_label, pred_label)] += 1
# Get top confusion pairs
top_confusions = sorted(confusion_pairs.items(), key=lambda x: x[1], reverse=True)[:10]
print("\n TOP 10 CONFUSION PATTERNS:")
print("-" * 80)
for (true_emotion, pred_emotion), count in top_confusions:
print(f" {true_emotion:.<20} → {pred_emotion:.<20} {count:>3} times")
# Sample misclassified examples
print("\n SAMPLE MISCLASSIFIED EXAMPLES:")
print("-" * 80)
sample_errors = np.random.choice(misclassified_indices,
min(5, len(misclassified_indices)),
replace=False)
for i, idx in enumerate(sample_errors, 1):
actual_idx = test_df.index[idx]
text = test_df.loc[actual_idx, 'text']
true_emotion = emotion_labels[y_test[idx]]
pred_emotion = emotion_labels[bert_test_pred[idx]]
print(f"\nExample {i}:")
print(f" Text: {text}")
print(f" True: {true_emotion:.<15} Predicted: {pred_emotion}")
================================================================================ ERROR ANALYSIS - MISCLASSIFICATION PATTERNS ================================================================================ Total misclassifications: 114 out of 2000 Error rate: 5.70% TOP 10 CONFUSION PATTERNS: -------------------------------------------------------------------------------- Fear................ → Surprise............ 29 times Joy................. → Love................ 22 times Anger............... → Fear................ 14 times Sadness............. → Fear................ 9 times Anger............... → Sadness............. 8 times Sadness............. → Anger............... 7 times Fear................ → Anger............... 5 times Joy................. → Surprise............ 5 times Love................ → Joy................. 3 times Surprise............ → Fear................ 3 times SAMPLE MISCLASSIFIED EXAMPLES: -------------------------------------------------------------------------------- Example 1: Text: i remembered that feeling and hated it True: Sadness........ Predicted: Anger Example 2: Text: i feel agitated all of a sudden True: Anger.......... Predicted: Fear Example 3: Text: when my parents did not let me go on a study tour True: Sadness........ Predicted: Anger Example 4: Text: i always feel a bit crap after sweet drinks but this leaves me feeling great no matter how many ive had True: Joy............ Predicted: Love Example 5: Text: i feel much more accepted in the us for my faith than i do for my skin color and if that changes so be it True: Joy............ Predicted: Love
PRACTICAL DEMONSTRATION - EMOTION PREDICTOR
In [148]:
custom_texts = [
"I am absolutely thrilled and ecstatic about my promotion! This is the best day of my life!",
"I feel so devastated and heartbroken. I can't stop crying since she left me.",
"You are the love of my life. I cherish every moment we spend together.",
"I am furious! How dare you disrespect me like that! This is completely unacceptable!",
"I'm terrified about the surgery tomorrow. What if something goes wrong?",
"Oh my God! I can't believe I just won the lottery! This is unbelievable!",
"I'm feeling a bit down today, nothing seems to go right.",
"I'm really worried about the exam results. I hope I passed.",
"I absolutely adore spending time with you. You make me so happy!",
"This situation makes me so anxious and nervous. I don't know what to do."
]
print("\n PREDICTING EMOTIONS FOR CUSTOM TEXTS:")
custom_predictions = []
for i, text in enumerate(custom_texts, 1):
# Tokenize and predict
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', padding='max_length',
truncation=True, max_length=128)
inputs = {k: v.to(device) for k, v in inputs.items()}
outputs = trainer.model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0].cpu().numpy()
prediction = torch.argmax(torch.tensor(probs)).item()
predicted_emotion = emotion_labels[prediction]
confidence = probs[prediction]
# Get top 3 predictions
top_3_idx = np.argsort(probs)[-3:][::-1]
print(f"\n{'='*80}")
print(f"CUSTOM PREDICTION {i}:")
print(f"{'='*80}")
print(f"Text: {text}")
print(f"\n Predicted Emotion: {predicted_emotion}")
print(f" Confidence: {confidence:.2%}")
print(f"\n Top 3 Predictions:")
print("-" * 80)
for rank, idx in enumerate(top_3_idx, 1):
emotion = emotion_labels[idx]
prob = probs[idx]
print(f" {rank}. {emotion:.<15} ({prob*100:>6.2f}%) ")
print(f"\n All Emotion Probabilities:")
print("-" * 80)
for label, prob in enumerate(probs):
emotion = emotion_labels[label]
print(f" {emotion:.<15} ({prob*100:>6.2f}%) ")
custom_predictions.append({
'Text': text[:50] + '...' if len(text) > 50 else text,
'Predicted': predicted_emotion,
'Confidence': f'{confidence:.2%}',
'2nd Choice': emotion_labels[top_3_idx[1]],
'3rd Choice': emotion_labels[top_3_idx[2]]
})
# Summary table
custom_df = pd.DataFrame(custom_predictions)
print("\n" + "=" * 80)
print("PREDICTIONS SUMMARY:")
print("=" * 80)
print(custom_df.to_string(index=False))
PREDICTING EMOTIONS FOR CUSTOM TEXTS:
================================================================================
CUSTOM PREDICTION 1:
================================================================================
Text: I am absolutely thrilled and ecstatic about my promotion! This is the best day of my life!
Predicted Emotion: Joy
Confidence: 99.96%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Joy............ ( 99.96%)
2. Sadness........ ( 0.01%)
3. Surprise....... ( 0.01%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.01%)
Joy............ ( 99.96%)
Love........... ( 0.01%)
Anger.......... ( 0.01%)
Fear........... ( 0.01%)
Surprise....... ( 0.01%)
================================================================================
CUSTOM PREDICTION 2:
================================================================================
Text: I feel so devastated and heartbroken. I can't stop crying since she left me.
Predicted Emotion: Sadness
Confidence: 99.95%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Sadness........ ( 99.95%)
2. Fear........... ( 0.02%)
3. Anger.......... ( 0.01%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 99.95%)
Joy............ ( 0.01%)
Love........... ( 0.01%)
Anger.......... ( 0.01%)
Fear........... ( 0.02%)
Surprise....... ( 0.00%)
================================================================================
CUSTOM PREDICTION 3:
================================================================================
Text: You are the love of my life. I cherish every moment we spend together.
Predicted Emotion: Love
Confidence: 84.48%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Love........... ( 84.48%)
2. Joy............ ( 15.38%)
3. Sadness........ ( 0.06%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.06%)
Joy............ ( 15.38%)
Love........... ( 84.48%)
Anger.......... ( 0.04%)
Fear........... ( 0.02%)
Surprise....... ( 0.02%)
================================================================================
CUSTOM PREDICTION 4:
================================================================================
Text: I am furious! How dare you disrespect me like that! This is completely unacceptable!
Predicted Emotion: Anger
Confidence: 99.92%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Anger.......... ( 99.92%)
2. Sadness........ ( 0.04%)
3. Fear........... ( 0.02%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.04%)
Joy............ ( 0.01%)
Love........... ( 0.00%)
Anger.......... ( 99.92%)
Fear........... ( 0.02%)
Surprise....... ( 0.00%)
================================================================================
CUSTOM PREDICTION 5:
================================================================================
Text: I'm terrified about the surgery tomorrow. What if something goes wrong?
Predicted Emotion: Fear
Confidence: 99.92%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Fear........... ( 99.92%)
2. Sadness........ ( 0.04%)
3. Surprise....... ( 0.02%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.04%)
Joy............ ( 0.01%)
Love........... ( 0.00%)
Anger.......... ( 0.01%)
Fear........... ( 99.92%)
Surprise....... ( 0.02%)
================================================================================
CUSTOM PREDICTION 6:
================================================================================
Text: Oh my God! I can't believe I just won the lottery! This is unbelievable!
Predicted Emotion: Surprise
Confidence: 97.51%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Surprise....... ( 97.51%)
2. Joy............ ( 1.38%)
3. Fear........... ( 0.92%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.11%)
Joy............ ( 1.38%)
Love........... ( 0.02%)
Anger.......... ( 0.06%)
Fear........... ( 0.92%)
Surprise....... ( 97.51%)
================================================================================
CUSTOM PREDICTION 7:
================================================================================
Text: I'm feeling a bit down today, nothing seems to go right.
Predicted Emotion: Sadness
Confidence: 99.65%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Sadness........ ( 99.65%)
2. Fear........... ( 0.23%)
3. Anger.......... ( 0.08%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 99.65%)
Joy............ ( 0.04%)
Love........... ( 0.01%)
Anger.......... ( 0.08%)
Fear........... ( 0.23%)
Surprise....... ( 0.00%)
================================================================================
CUSTOM PREDICTION 8:
================================================================================
Text: I'm really worried about the exam results. I hope I passed.
Predicted Emotion: Fear
Confidence: 92.30%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Fear........... ( 92.30%)
2. Sadness........ ( 4.00%)
3. Anger.......... ( 3.31%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 4.00%)
Joy............ ( 0.32%)
Love........... ( 0.03%)
Anger.......... ( 3.31%)
Fear........... ( 92.30%)
Surprise....... ( 0.04%)
================================================================================
CUSTOM PREDICTION 9:
================================================================================
Text: I absolutely adore spending time with you. You make me so happy!
Predicted Emotion: Joy
Confidence: 99.46%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Joy............ ( 99.46%)
2. Sadness........ ( 0.28%)
3. Anger.......... ( 0.19%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.28%)
Joy............ ( 99.46%)
Love........... ( 0.04%)
Anger.......... ( 0.19%)
Fear........... ( 0.02%)
Surprise....... ( 0.01%)
================================================================================
CUSTOM PREDICTION 10:
================================================================================
Text: This situation makes me so anxious and nervous. I don't know what to do.
Predicted Emotion: Fear
Confidence: 99.86%
Top 3 Predictions:
--------------------------------------------------------------------------------
1. Fear........... ( 99.86%)
2. Sadness........ ( 0.09%)
3. Anger.......... ( 0.04%)
All Emotion Probabilities:
--------------------------------------------------------------------------------
Sadness........ ( 0.09%)
Joy............ ( 0.01%)
Love........... ( 0.00%)
Anger.......... ( 0.04%)
Fear........... ( 99.86%)
Surprise....... ( 0.01%)
================================================================================
PREDICTIONS SUMMARY:
================================================================================
Text Predicted Confidence 2nd Choice 3rd Choice
I am absolutely thrilled and ecstatic about my pro... Joy 99.96% Sadness Surprise
I feel so devastated and heartbroken. I can't stop... Sadness 99.95% Fear Anger
You are the love of my life. I cherish every momen... Love 84.48% Joy Sadness
I am furious! How dare you disrespect me like that... Anger 99.92% Sadness Fear
I'm terrified about the surgery tomorrow. What if ... Fear 99.92% Sadness Surprise
Oh my God! I can't believe I just won the lottery!... Surprise 97.51% Joy Fear
I'm feeling a bit down today, nothing seems to go ... Sadness 99.65% Fear Anger
I'm really worried about the exam results. I hope ... Fear 92.30% Sadness Anger
I absolutely adore spending time with you. You mak... Joy 99.46% Sadness Anger
This situation makes me so anxious and nervous. I ... Fear 99.86% Sadness Anger
In [149]:
from wordcloud import WordCloud
# Collect text for each emotion
emotion_texts = {}
for emotion_label, emotion_name in emotion_labels.items():
# Get all texts for this emotion
emotion_data = train_df[train_df['label'] == emotion_label]['cleaned_text']
combined_text = ' '.join(emotion_data.tolist())
emotion_texts[emotion_name] = combined_text
# Create word clouds
fig, axes = plt.subplots(2, 3, figsize=(20, 13))
axes = axes.ravel()
# Color schemes for each emotion
color_schemes = {
'Sadness': 'Blues',
'Joy': 'YlOrRd',
'Love': 'RdPu',
'Anger': 'Reds',
'Fear': 'Purples',
'Surprise': 'Oranges'
}
for idx, (emotion_name, text) in enumerate(emotion_texts.items()):
print(f" Creating word cloud for {emotion_name}...")
# Create word cloud
wordcloud = WordCloud(
width=800,
height=600,
background_color='white',
colormap=color_schemes[emotion_name],
max_words=100,
relative_scaling=0.5,
min_font_size=10,
collocations=False
).generate(text)
# Plot
axes[idx].imshow(wordcloud, interpolation='bilinear')
axes[idx].axis('off')
axes[idx].set_title(f'{emotion_name} - Word Cloud',
fontsize=16, fontweight='bold', pad=20)
# Add word count
word_count = len(text.split())
axes[idx].text(0.02, 0.98, f'Words: {word_count:,}',
transform=axes[idx].transAxes,
fontsize=12, fontweight='bold',
verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
plt.suptitle('Word Clouds for Each Emotion Category',
fontsize=18, fontweight='bold', y=0.98)
plt.tight_layout()
plt.savefig('emotion_wordclouds.png', dpi=300, bbox_inches='tight')
plt.show()
Creating word cloud for Sadness... Creating word cloud for Joy... Creating word cloud for Love... Creating word cloud for Anger... Creating word cloud for Fear... Creating word cloud for Surprise...
In [150]:
unique_words = {}
for emotion_name in emotion_labels.values():
# Get word frequencies for this emotion
emotion_words = Counter(emotion_texts[emotion_name].split())
# Get word frequencies for all other emotions combined
other_text = ' '.join([text for name, text in emotion_texts.items()
if name != emotion_name])
other_words = Counter(other_text.split())
# Find words that are unique to this emotion (ratio > 3)
unique = []
for word, count in emotion_words.most_common(100):
if count > 10: # Must appear at least 10 times
other_count = other_words.get(word, 1)
ratio = count / other_count
if ratio > 2.0: # Appears at least 2x more in this emotion
unique.append((word, count, ratio))
unique_words[emotion_name] = sorted(unique, key=lambda x: x[2], reverse=True)[:15]
# Display unique words
print("\n UNIQUE/DISTINCTIVE WORDS FOR EACH EMOTION:")
print("=" * 80)
for emotion_name, words in unique_words.items():
print(f"\n{emotion_name}:")
print("-" * 80)
print(f"{'Word':<20} {'Frequency':<15} {'Uniqueness Ratio':<20}")
print("-" * 80)
for word, freq, ratio in words:
print(f"{word:<20} {freq:<15} {ratio:.2f}x")
# Visualize unique words
fig, axes = plt.subplots(2, 3, figsize=(20, 12))
axes = axes.ravel()
for idx, (emotion_name, words) in enumerate(unique_words.items()):
if words:
word_list, freq_list, ratio_list = zip(*words[:10])
# Color by ratio
colors_unique = plt.cm.RdYlGn(np.array(ratio_list) / max(ratio_list))
axes[idx].barh(range(len(word_list)), ratio_list, color=colors_unique,
alpha=0.8, edgecolor='black')
axes[idx].set_yticks(range(len(word_list)))
axes[idx].set_yticklabels(word_list)
axes[idx].set_xlabel('Uniqueness Ratio', fontsize=11, fontweight='bold')
axes[idx].set_title(f'{emotion_name} - Most Distinctive Words',
fontsize=13, fontweight='bold')
axes[idx].invert_yaxis()
axes[idx].grid(True, alpha=0.3, axis='x')
# Add value labels
for i, (word, ratio) in enumerate(zip(word_list, ratio_list)):
axes[idx].text(ratio + 0.1, i, f'{ratio:.2f}x',
va='center', fontweight='bold', fontsize=9)
plt.suptitle('Most Distinctive Words for Each Emotion (Uniqueness Ratio)',
fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('unique_words_by_emotion.png', dpi=300, bbox_inches='tight')
plt.show()
UNIQUE/DISTINCTIVE WORDS FOR EACH EMOTION: ================================================================================ Sadness: -------------------------------------------------------------------------------- Word Frequency Uniqueness Ratio -------------------------------------------------------------------------------- unwelcome 45 45.00x homesick 38 38.00x dull 37 37.00x needy 42 10.50x aching 39 9.75x boring 36 9.00x disturbed 42 5.25x missed 39 3.90x lonely 49 3.27x awful 42 3.23x Joy: -------------------------------------------------------------------------------- Word Frequency Uniqueness Ratio -------------------------------------------------------------------------------- brave 36 12.00x joyful 39 9.75x fantastic 37 6.17x creative 39 3.25x smart 35 3.18x confident 35 2.92x Love: -------------------------------------------------------------------------------- Word Frequency Uniqueness Ratio -------------------------------------------------------------------------------- tender 108 108.00x naughty 102 102.00x loyal 94 94.00x horny 90 90.00x delicate 80 80.00x fond 55 55.00x gentle 107 53.50x compassionate 52 52.00x affectionate 40 40.00x nostalgic 111 37.00x sympathetic 107 21.40x longing 112 18.67x romantic 92 18.40x caring 109 18.17x supportive 89 17.80x Anger: -------------------------------------------------------------------------------- Word Frequency Uniqueness Ratio -------------------------------------------------------------------------------- insulted 73 73.00x cranky 69 69.00x dissatisfied 64 64.00x greedy 63 63.00x resentful 59 59.00x wronged 56 56.00x envious 54 54.00x petty 44 44.00x distracted 70 35.00x dangerous 58 29.00x grumpy 56 28.00x violent 54 27.00x bitter 77 25.67x stubborn 45 22.50x rushed 67 22.33x Fear: -------------------------------------------------------------------------------- Word Frequency Uniqueness Ratio -------------------------------------------------------------------------------- intimidated 85 85.00x hesitant 79 79.00x threatened 79 79.00x uncertain 74 74.00x apprehensive 68 68.00x frightened 66 66.00x shaky 64 64.00x reluctant 60 60.00x doubtful 49 49.00x suspicious 49 49.00x frantic 40 40.00x paranoid 73 36.50x shaken 68 34.00x distressed 60 30.00x shy 83 27.67x Surprise: -------------------------------------------------------------------------------- Word Frequency Uniqueness Ratio -------------------------------------------------------------------------------- dazed 155 155.00x amazed 247 123.50x stunned 123 123.00x impressed 267 66.75x shocked 238 39.67x curious 244 30.50x surprised 216 19.64x funny 213 8.19x amazing 258 4.69x strange 252 3.04x weird 258 3.00x overwhelmed 258 2.24x
COMPREHENSIVE COMPARISION OF MODEL
In [151]:
# Get predictions for all models
models_predictions = {
'Naive Bayes (Baseline)': mnb_test_pred,
'Naive Bayes (Tuned)': best_mnb_pred,
'Logistic Regression (Baseline)': lr_test_pred,
'Logistic Regression (Tuned)': best_lr_pred,
'SVM (Baseline)': svm_test_pred,
'SVM (Tuned)': best_svm_pred,
'Ensemble (Baseline)': ensemble_test_pred,
'DistilBERT': bert_test_pred
}
# Calculate all metrics
comprehensive_metrics = []
for model_name, predictions in models_predictions.items():
metrics = {
'Model': model_name,
'Accuracy': accuracy_score(y_test, predictions),
'Precision_Macro': precision_score(y_test, predictions, average='macro'),
'Recall_Macro': recall_score(y_test, predictions, average='macro'),
'F1_Macro': f1_score(y_test, predictions, average='macro'),
'Precision_Weighted': precision_score(y_test, predictions, average='weighted'),
'Recall_Weighted': recall_score(y_test, predictions, average='weighted'),
'F1_Weighted': f1_score(y_test, predictions, average='weighted'),
}
# Determine category
if 'DistilBERT' in model_name:
metrics['Category'] = 'Deep Learning'
elif 'Tuned' in model_name:
metrics['Category'] = 'Tuned'
elif 'Ensemble' in model_name:
metrics['Category'] = 'Ensemble'
else:
metrics['Category'] = 'Baseline'
comprehensive_metrics.append(metrics)
# Create DataFrame
metrics_df = pd.DataFrame(comprehensive_metrics)
metrics_df = metrics_df.sort_values('F1_Macro', ascending=False)
print("\n COMPREHENSIVE METRICS TABLE:")
print(metrics_df.to_string(index=False))
COMPREHENSIVE METRICS TABLE:
Model Accuracy Precision_Macro Recall_Macro F1_Macro Precision_Weighted Recall_Weighted F1_Weighted Category
DistilBERT 0.9430 0.944187 0.943039 0.942864 0.944193 0.9430 0.942848 Deep Learning
Logistic Regression (Tuned) 0.9200 0.920761 0.920058 0.919459 0.920757 0.9200 0.919428 Tuned
SVM (Tuned) 0.9190 0.919813 0.919055 0.918472 0.919808 0.9190 0.918442 Tuned
Ensemble (Baseline) 0.9170 0.917725 0.917061 0.916483 0.917717 0.9170 0.916449 Ensemble
Logistic Regression (Baseline) 0.9160 0.916605 0.916058 0.915494 0.916595 0.9160 0.915459 Baseline
SVM (Baseline) 0.9130 0.913447 0.913051 0.912618 0.913434 0.9130 0.912586 Baseline
Naive Bayes (Tuned) 0.8905 0.893222 0.890567 0.889698 0.893221 0.8905 0.889663 Tuned
Naive Bayes (Baseline) 0.8835 0.886169 0.883566 0.882856 0.886168 0.8835 0.882822 Baseline
In [152]:
# Plot 2: Heatmap of All Metrics
fig, ax = plt.subplots(figsize=(14, 10))
# Prepare data for heatmap
heatmap_data = metrics_df[['Model', 'Accuracy', 'Precision_Macro',
'Recall_Macro', 'F1_Macro']].set_index('Model')
sns.heatmap(heatmap_data.T, annot=True, fmt='.4f', cmap='RdYlGn',
cbar_kws={'label': 'Score'}, linewidths=1, linecolor='black',
vmin=0.65, vmax=0.95, ax=ax)
ax.set_xlabel('', fontsize=12, fontweight='bold')
ax.set_ylabel('Metrics', fontsize=12, fontweight='bold')
ax.set_title('Model Performance Heatmap - All Metrics',
fontsize=14, fontweight='bold', pad=20)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
plt.savefig('models_heatmap_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
In [153]:
fig, axes = plt.subplots(2, 2, figsize=(18, 14))
metrics_to_plot = [
('Accuracy', 'Accuracy'),
('F1_Macro', 'F1-Score (Macro)'),
('Precision_Macro', 'Precision (Macro)'),
('Recall_Macro', 'Recall (Macro)')
]
category_colors = {
'Baseline': '#3498db',
'Tuned': '#2ecc71',
'Ensemble': '#f39c12',
'Deep Learning': '#e74c3c'
}
for idx, (metric, title) in enumerate(metrics_to_plot):
ax = axes[idx // 2, idx % 2]
# Sort by this metric
sorted_df = metrics_df.sort_values(metric, ascending=True)
# Color by category
colors = [category_colors[cat] for cat in sorted_df['Category']]
bars = ax.barh(range(len(sorted_df)), sorted_df[metric],
color=colors, alpha=0.8, edgecolor='black')
ax.set_yticks(range(len(sorted_df)))
ax.set_yticklabels(sorted_df['Model'], fontsize=9)
ax.set_xlabel(title, fontsize=12, fontweight='bold')
ax.set_title(f'{title} Comparison', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')
ax.set_xlim([0.65, 0.95])
# Add value labels
for bar, value in zip(bars, sorted_df[metric]):
ax.text(value + 0.005, bar.get_y() + bar.get_height()/2,
f'{value:.4f}',
ha='left', va='center', fontweight='bold', fontsize=8)
# Add legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=color, label=cat, alpha=0.8)
for cat, color in category_colors.items()]
fig.legend(handles=legend_elements, loc='upper center',
bbox_to_anchor=(0.5, 0.98), ncol=4, fontsize=12)
plt.suptitle('Comprehensive Model Comparison - All Metrics',
fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.savefig('comprehensive_bar_comparison.png', dpi=300, bbox_inches='tight')
plt.show()
In [154]:
# Group by model type
progression = {
'Naive Bayes': [
('Baseline', mnb_test_acc, f1_score(y_test, mnb_test_pred, average='macro')),
('Tuned', best_mnb_acc, best_mnb_f1)
],
'Logistic Regression': [
('Baseline', lr_test_acc, f1_score(y_test, lr_test_pred, average='macro')),
('Tuned', best_lr_acc, best_lr_f1)
],
'SVM': [
('Baseline', svm_test_acc, f1_score(y_test, svm_test_pred, average='macro')),
('Tuned', best_svm_acc, best_svm_f1)
]
}
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
for idx, (model_type, improvements) in enumerate(progression.items()):
ax = axes[idx]
stages = [imp[0] for imp in improvements]
accuracies = [imp[1] for imp in improvements]
f1_scores = [imp[2] for imp in improvements]
x = np.arange(len(stages))
width = 0.35
bars1 = ax.bar(x - width/2, accuracies, width, label='Accuracy',
alpha=0.8, color='#3498db')
bars2 = ax.bar(x + width/2, f1_scores, width, label='F1-Score',
alpha=0.8, color='#2ecc71')
ax.set_ylabel('Score', fontsize=12, fontweight='bold')
ax.set_title(f'{model_type}\nImprovement', fontsize=13, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(stages)
ax.legend()
ax.set_ylim([0.65, 0.85])
ax.grid(True, alpha=0.3, axis='y')
# Add value labels and improvement percentages
for bar_set in [bars1, bars2]:
for i, bar in enumerate(bar_set):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.3f}',
ha='center', va='bottom', fontweight='bold', fontsize=9)
if i == 1: # Show improvement for tuned version
prev_height = bar_set[0].get_height()
improvement = ((height - prev_height) / prev_height) * 100
ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
f'(+{improvement:.1f}%)',
ha='center', va='bottom', fontsize=8, color='green')
plt.suptitle('Traditional ML Models - Baseline vs Tuned Comparison',
fontsize=15, fontweight='bold')
plt.tight_layout()
plt.savefig('baseline_vs_tuned_progression.png', dpi=300, bbox_inches='tight')
plt.show()
In [155]:
print("\n" + "=" * 80)
print("FINAL SUMMARY & RECOMMENDATIONS")
print("=" * 80)
# Get best model from each category
best_baseline = metrics_df[metrics_df['Category'] == 'Baseline'].iloc[0]
best_tuned = metrics_df[metrics_df['Category'] == 'Tuned'].iloc[0]
best_ensemble = metrics_df[metrics_df['Category'] == 'Ensemble'].iloc[0]
best_dl = metrics_df[metrics_df['Category'] == 'Deep Learning'].iloc[0]
summary_comparison = pd.DataFrame([
{
'Category': 'Best Baseline',
'Model': best_baseline['Model'],
'Accuracy': best_baseline['Accuracy'],
'F1-Score': best_baseline['F1_Macro']
},
{
'Category': 'Best Tuned',
'Model': best_tuned['Model'],
'Accuracy': best_tuned['Accuracy'],
'F1-Score': best_tuned['F1_Macro']
},
{
'Category': 'Best Ensemble',
'Model': best_ensemble['Model'],
'Accuracy': best_ensemble['Accuracy'],
'F1-Score': best_ensemble['F1_Macro']
},
{
'Category': 'Best Overall (DL)',
'Model': best_dl['Model'],
'Accuracy': best_dl['Accuracy'],
'F1-Score': best_dl['F1_Macro']
}
])
print(summary_comparison.to_string(index=False))
================================================================================
FINAL SUMMARY & RECOMMENDATIONS
================================================================================
Category Model Accuracy F1-Score
Best Baseline Logistic Regression (Baseline) 0.916 0.915494
Best Tuned Logistic Regression (Tuned) 0.920 0.919459
Best Ensemble Ensemble (Baseline) 0.917 0.916483
Best Overall (DL) DistilBERT 0.943 0.942864
In [156]:
# Visualize category winners
fig, ax = plt.subplots(figsize=(14, 8))
categories = summary_comparison['Category']
accuracy = summary_comparison['Accuracy']
f1 = summary_comparison['F1-Score']
x = np.arange(len(categories))
width = 0.35
bars1 = ax.bar(x - width/2, accuracy, width, label='Accuracy',
alpha=0.8, color='#3498db', edgecolor='black')
bars2 = ax.bar(x + width/2, f1, width, label='F1-Score',
alpha=0.8, color='#2ecc71', edgecolor='black')
ax.set_ylabel('Score', fontsize=12, fontweight='bold')
ax.set_title('Best Model in Each Category', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=15, ha='right')
ax.legend(fontsize=11)
ax.set_ylim([0.65, 0.95])
ax.grid(True, alpha=0.3, axis='y')
# Add value labels
for bars in [bars1, bars2]:
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height,
f'{height:.4f}',
ha='center', va='bottom', fontweight='bold', fontsize=10)
# Add model names below bars
for i, model in enumerate(summary_comparison['Model']):
ax.text(i, 0.67, model, ha='center', va='top',
fontsize=8, rotation=0, style='italic')
plt.tight_layout()
plt.savefig('category_winners_comparison.png', dpi=300, bbox_inches='tight')
plt.show()